library(microViz)
## 
## microViz version 0.9.3 - Copyright (C) 2021 David Barnett
## * Website: https://david-barnett.github.io/microViz/
## * Useful? For citation info, run: citation('microViz')
## * Silence: suppressPackageStartupMessages(library(microViz))
pacman::p_load(tidyverse, phyloseq, magrittr, janitor, microbiome, knitr, lubridate, naniar, readxl, ggplot2, ggpubr)

library(dendextend)
## Warning: package 'dendextend' was built under R version 4.1.2
## Registered S3 method overwritten by 'dendextend':
##   method     from 
##   rev.hclust vegan
## 
## ---------------------
## Welcome to dendextend version 1.17.1
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## You may ask questions at stackoverflow, use the r and dendextend tags: 
##   https://stackoverflow.com/questions/tagged/dendextend
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------
## 
## Attaching package: 'dendextend'
## The following object is masked from 'package:ggpubr':
## 
##     rotate
## The following object is masked from 'package:stats':
## 
##     cutree
library(vegan)
## Loading required package: permute
## Warning: package 'permute' was built under R version 4.1.2
## 
## Attaching package: 'permute'
## The following object is masked from 'package:dendextend':
## 
##     shuffle
## Loading required package: lattice
## This is vegan 2.6-6.1
## 
## Attaching package: 'vegan'
## The following object is masked from 'package:microbiome':
## 
##     diversity
ps_full <- readRDS("/Users/rebecca/Documents/Forschung/IMMProveCF/R_analysis/rds_files/ps_IMProveCF_patientsAndControls_Run1-23_18102023.rds")

source("/Users/rebecca/Documents/Forschung/IMMProveCF/R_analysis/IMMProveCF/functions_full.R")

visit_sum_palette <- c("black", "#C3BC3FFF", "#6388B4FF", "#BB7693FF", "#55AD89FF", "#8176AA")

dom_palette <- c(Streptococcus="#69b3a2", Staphylococcus = "#8175AA", Fusobacterium="#6FB899FF", Prevotella_7="#31A1B3FF", Rothia="#027B8EFF", Pseudomonas="#EE6AA7", Eikenella="#94D0C0FF", Haemophilus="#CCB22BFF", Achromobacter="#9F8F12FF", Gemella= "#97CFD0" , Moraxella= "#6FB899", `missing sample` = "#CFCFCF", `Burkholderia-Caballeronia-Paraburkholderia` = "#E5C494")
ps_full_relab <- transform_sample_counts(ps_full, function(x) x/sum(x)*100)

#subset materials
ps_full_throat <- subset_samples(ps_full_relab, material== "Throat")
ps_full_stool <- subset_samples(ps_full_relab, material== "Stool")

ps_full_throat_glom <- tax_glom(ps_full_throat, taxrank = "Genus")
ps_full_stool_glom <- tax_glom(ps_full_stool, taxrank = "Genus")

1 Analysis on most abundant taxa per patient in throat

#Get top taxa per patient
#find.top.taxa2 is sourced from functions.R
top.throat<- find.top.taxa2(ps_full_throat_glom, "Genus",1)
top.throat$Species<- NULL

rslt <- top.throat[, "taxa"]
dd <- matrix(unlist(rslt), nrow=1)
colnames(dd) <- rownames(top.throat)
top.throat <- t(dd)

top.throat_df <- data.frame(x1 = row.names(top.throat), top.throat)%>%
  mutate(dominantGenus = top.throat)
top.throat_df$top.throat<- NULL

# add clinical data to p

##Add dominant Genus to ps_full_throat_glom sample data
ps_full_throat_glom_j <- microViz::ps_join(ps_full_throat_glom, top.throat_df, by = "x1")

##Add dominant Genus to ps_full_throat sample data
ps_full_throat_j <- microViz::ps_join(ps_full_throat, top.throat_df, by = "x1")

# Control's dominant taxa
summary(as_factor(sample_data(ps_full_throat_j)$dominantGenus[sample_data(ps_full_throat_j)$project=="IMMPIMMP"]))
## Streptococcus   Veillonella     Neisseria Fusobacterium   Haemophilus 
##            32             2             3             1             1
# Patients's dominant taxa
summary(as_factor(sample_data(ps_full_throat_j)$dominantGenus[sample_data(ps_full_throat_j)$project!="IMMPIMMP"]))
##        Prevotella_7       Streptococcus      Stomatobaculum             Gemella 
##                  32                 109                   1                   2 
##       Fusobacterium        Leptotrichia         Veillonella         Actinomyces 
##                   9                  10                  15                   1 
##         Haemophilus           Neisseria              Rothia      Actinobacillus 
##                   4                  13                   1                   2 
##          Prevotella        Oribacterium Lachnoanaerobaculum                NA's 
##                   3                   1                   1                   9
 ### plot principal component analysis by dominant Genus
PCA_all_visit_throat_dominantGenus <- plot_ordination(ps_full_throat_j, ordinate(ps_full_throat_j, "MDS"), color = "dominantGenus", shape="visit_sum")

xlabels <- c("0","3","6-12","15-18","21-24","Control")

PCA_all_visit_throat_dominantGenus+
  geom_line(aes(group=id.x), color="grey")+
  geom_point(size = 4)+
  ggtitle("Variation by dominant Genus")+
  scale_color_manual(values = dom_palette)+
  scale_shape_manual(values = c(15, 16, 17, 18, 12, 23),labels = xlabels)+
  theme_bw()+
  theme(text=element_text(size=24), legend.position = "right", legend.text = element_text(size=16), legend.title = element_text(size=18))+
  guides(color = guide_legend(title = "dominant genus"))+
   guides(shape = guide_legend(title = "months from treatment start"))

### plot principal component analysis by visit_sum
PCA_all_visit_throat_dominantGenus <- plot_ordination(ps_full_throat_j, ordinate(ps_full_throat_j, "MDS"), color = "visit_sum", shape="project")

project_label <- c("Control","CF")

xlabels <- c("0", "3", "6-12", "15-18", "21-24", "Control")

p1 <- PCA_all_visit_throat_dominantGenus +
  geom_point(size = 4, alpha = 0.7) +
  scale_color_manual(values = visit_sum_palette, labels = xlabels) +
  scale_shape_manual(values = c(15, 16), labels = project_label) +
  theme_classic() +
  theme(text = element_text(size = 18), legend.position = "none", legend.text = element_text(size = 16), legend.title = element_blank()) +
  guides(color = guide_legend(title = "Months from treatment start", title.position = "left", ncol = 2))+
  stat_ellipse()
p1

p1_leg <- PCA_all_visit_throat_dominantGenus +
  geom_point(size = 4, alpha = 0.7) +
  scale_color_manual(values = visit_sum_palette, labels = xlabels) +
  scale_shape_manual(values = c(15, 16), labels = project_label) +
  theme_classic() +
  theme(text = element_text(size = 16), legend.position = "bottom", legend.text = element_text(size = 16)) +
  guides(color = guide_legend(title = "Months from treatment start", title.position = "left", ncol = 6),shape = guide_legend(title = "", title.position = "left", ncol = 2))+
  stat_ellipse()
p1_leg

#ggsave("/Users/rebecca/Documents/Forschung/IMMProveCF/R_analysis/figures/beta_throat_controls.png", width = 10, height = 6)

2 create density plots for axis 1 and 2

# Perform PCoA
ord_obj <- ordinate(ps_full_throat_j, "MDS")

# Extract coordinates from PCoA
pcoa_coords <- ord_obj$vectors

# Create a data frame for ggplot
df <- data.frame(x1 = pcoa_coords[, 1], x2 = pcoa_coords[, 2], visit_sum = sample_data(ps_full_throat_j)$visit_sum,age_y = sample_data(ps_full_throat_j)$age_y,sex= sample_data(ps_full_throat_j)$sex,id.x= sample_data(ps_full_throat_j)$id.x)

# Create a named vector for custom labels
custom_labels <- c("1" = "0", 
                   "2" = "3", 
                   "3-5" = "6-12", 
                   "6-7" = "15-18", 
                   "8-10" = "21-24", 
                   "Control" = "Control")
# Create density plots on the x and y axes
plot_x_th <- ggplot(df, aes(x = x1, fill = as.factor(visit_sum))) +
  geom_density(alpha = 0.5) +
  theme_classic() +
  #clean_theme()+
  scale_x_continuous(breaks = c(-0.3, 0, 0.3), labels = c("-0.3", "0", "0.3"))+
  scale_fill_manual(values = visit_sum_palette, labels = xlabels)+
  theme(legend.position = "none", text = element_text(size = 18), legend.title = element_blank())


# Define the common y-axis limits
y_axis_limits <- c(0, 2.5)  # Adjust these values as needed

# Density plot with fixed y-axis range
axis1_den <- ggplot(df, aes(x = x1, fill = as.factor(visit_sum))) +
  geom_density(alpha = 0.8) +
  theme_classic() +
  scale_x_continuous(breaks = c(-0.3, 0, 0.3), labels = c("-0.3", "0", "0.3")) +
    scale_fill_manual(values = visit_sum_palette, labels = xlabels) +
  theme(
    legend.position = "none",
    text = element_text(size = 18),
    legend.title = element_blank(),
    strip.text.y = element_text(angle = 0)  # Rotate the facet labels
  ) +
  facet_grid(rows = vars(visit_sum), labeller =  as_labeller(custom_labels)) +
  ylim(y_axis_limits)+
  labs( x = "Axis 1")
axis1_den

# Boxplot comparing x between the 5 groups
axis1_bp <- ggplot(df, aes(x = as.factor(visit_sum), y = x1, fill = as.factor(visit_sum))) +
  geom_boxplot(alpha=0.8) +
  theme_classic() +
  scale_x_discrete(labels = xlabels) +
  scale_fill_manual(values = visit_sum_palette, labels = xlabels) +
  theme(
    legend.position = "none",
    text = element_text(size = 18),
    legend.title = element_blank()
  ) +
  labs(x = "Months from tt start", y = "Axis 1")
axis1_bp

plot_y_th <- ggplot(df, aes(x = x2, fill = as.factor(visit_sum))) +
  geom_density(alpha = 0.5) +
  theme_classic() +
  #clean_theme()+
  xlim(c(-0.6, 0.4))+
  scale_fill_manual(values = visit_sum_palette, labels = xlabels)+
  theme(legend.position = "none",text = element_text(size = 18), legend.title = element_blank())+
  coord_flip()

# Define the common y-axis limits
y_axis_limits <- c(0, 3.5)  # Adjust these values as needed

# Density plot with fixed y-axis range
axis2_den <- ggplot(df, aes(x = x2, fill = as.factor(visit_sum))) +
  geom_density(alpha = 0.8) +
  theme_classic() +
  scale_x_continuous(breaks = c(-0.6, 0, 0.4), labels = c("-0.6", "0", "0.4")) +
  scale_fill_manual(values = visit_sum_palette, labels = xlabels) +
  theme(
    legend.position = "none",
    text = element_text(size = 18),
    legend.title = element_blank(),
    strip.text.y = element_text(angle = 0)  # Rotate the facet labels
  ) +
  facet_grid(rows = vars(visit_sum), labeller =  as_labeller(custom_labels)) +
  ylim(y_axis_limits)+
  labs(x = "Axis 2")
axis2_den

# Boxplot comparing x between the 5 groups
axis2_bp <- ggplot(df, aes(x = as.factor(visit_sum), y = x2, fill = as.factor(visit_sum))) +
  geom_boxplot(alpha=0.8) +
  theme_classic() +
  scale_x_discrete(labels = xlabels) +
  scale_fill_manual(values = visit_sum_palette, labels = xlabels) +
  theme(
    legend.position = "none",
    text = element_text(size = 18),
    legend.title = element_blank(),
  ) +
  labs(x = "Months from tt start", y = "Axis 2")
axis2_bp

# Extract the legend. Returns a gtable
leg <- get_legend(p1_leg)

# Convert to a ggplot and print
legend <- as_ggplot(leg)

# Arranging the plot

axis <- ggarrange(axis1_den, axis2_den, axis1_bp, axis2_bp, 
          ncol = 2, nrow = 2,  align = "hv", 
          widths = c(1, 1),
          common.legend = F)
## Warning: Graphs cannot be vertically aligned unless the axis parameter is set.
## Placing graphs unaligned.
## Warning: Graphs cannot be horizontally aligned unless the axis parameter is
## set. Placing graphs unaligned.
axis

throat_density <- ggarrange(plot_x_th, NULL, p1, plot_y_th, 
          ncol = 2, nrow = 2,  align = "hv", 
          widths = c(2, 1), heights = c(1, 2),
          common.legend = F)
throat_density

# Load required libraries
library(tidyverse)
library(lmerTest)
## Loading required package: lme4
## Warning: package 'lme4' was built under R version 4.1.2
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step
library(ggpubr)

# Define the custom labels
xlabels <- c("0", "3", "6-12", "15-18", "21-24", "Control")

# Convert visit_sum to factor with specified levels
df<- df%>%
  mutate(visit_sum = factor(visit_sum, levels = c("1", "2", "3-5", "6-7", "8-10", "Control")))

# Define the function for linear model and boxplot
lm_corrected_pvalues_boxplot <- function(x, ylabel) {
  
  print(ylabel) # x is the diversity parameter I am interested in
  
  # Fit the linear mixed-effects model
  lm <- summary(lmerTest::lmer(x ~ visit_sum + age_y + sex + (1|id.x), data = df))
  
  # Extract coefficients and compute FDR
  coefs <- data.frame(coef(lm))
  fdr <- p.adjust(coefs$Pr...t.., method = "fdr", n = nrow(coefs))
  
  # Prepare the statistics table
  lm_stats <- bind_cols(coefs, fdr) %>%
    mutate(p = Pr...t..) %>%
    mutate(fdr = ...6) %>%
    select(-c(Pr...t.., ...6)) %>%
    rownames_to_column() %>%
    mutate(Months_after_ETI_start = rowname) %>%
    mutate(Months_after_ETI_start = case_when(
      Months_after_ETI_start == "(Intercept)" ~ "Baseline (Intercept)",
      Months_after_ETI_start == "visit_sum2" ~ "3",
      Months_after_ETI_start == "visit_sum3-5" ~ "6-12",
      Months_after_ETI_start == "visit_sum6-7" ~ "15-18",
      Months_after_ETI_start == "visit_sum8-10" ~ "21-24",
      Months_after_ETI_start == "visit_sumControl" ~ "Control",
      Months_after_ETI_start == "age_y" ~ "age_y",
      Months_after_ETI_start == "sex2" ~ "sex"
    )) %>%
    mutate(fdr_star = case_when(
      fdr <= 0.001 ~ "***",
      fdr <= 0.01 ~ "**",
      fdr <= 0.05 ~ "*",
      fdr <= 0.1 ~ ".",
      fdr > 0.1 ~ "ns"
    )) %>%
    mutate(p_star = case_when(
      p <= 0.001 ~ "***",
      p <= 0.01 ~ "**",
      p <= 0.05 ~ "*",
      p <= 0.1 ~ ".",
      p > 0.1 ~ "ns"
    )) %>%
    select(-rowname) %>%
    select(Months_after_ETI_start, Estimate, Std..Error, df, t.value, p, fdr, p_star, fdr_star) %>%
    mutate(p = round(p, 5)) %>%
    mutate(fdr = round(fdr, 5))
  
  lm_stats_print <- lm_stats %>% 
    mutate(alpha_diversity = ylabel) %>% 
    select(alpha_diversity, everything())

  # Adjust table for significance asterisks on the plot
  group1 <- c("(Intercept)", "1", "1", "1", "1", "1", "1", "1")
  
  lm_stats <- lm_stats %>%
    bind_cols(group1) %>%
    mutate(group1 = ...10) %>%
    mutate(group2 = case_when(
      Months_after_ETI_start == "Baseline (Intercept)" ~ "(Intercept)",
      Months_after_ETI_start == "3" ~ "2",
      Months_after_ETI_start == "6-12" ~ "3-5",
      Months_after_ETI_start == "15-18" ~ "6-7",
      Months_after_ETI_start == "21-24" ~ "8-10",
      Months_after_ETI_start == "Control" ~ "Control"
    )) %>%
    filter(group2 != "(Intercept)")

  # Create the boxplot
  boxplot <- ggplot(df, aes(visit_sum, x)) +
    geom_boxplot(aes(fill = visit_sum), outlier.shape = NA, alpha = 0.65) +
    geom_jitter(width = 0.1) +
    scale_fill_manual(values = visit_sum_palette) +
    theme_classic() +
    ylab(ylabel) +
    xlab("Months from ETI treatment start") +
    scale_x_discrete(labels = xlabels) +
    theme(
      text = element_text(size = 16), 
      legend.position = "none"
    ) +
    guides(fill = 'none') +
    stat_pvalue_manual(lm_stats, label = "fdr_star", y.position = (max(x, na.rm = TRUE) - 0.07 * max(x, na.rm = TRUE)), step.increase = 0.05)
  
  return(list(boxplot = boxplot, lm_stats = lm_stats_print))
}

# Run function on  data
axis1_throat <- lm_corrected_pvalues_boxplot(df$x1, "Axis 1")
## [1] "Axis 1"
## New names:
## • `` -> `...6`
## New names:
## • `` -> `...10`
axis1_throat_lm_stats <- axis1_throat$lm_stats # Access lm_stats
p3 <- axis1_throat$boxplot # Access boxplot

# Print the boxplot
print(p3)

axis2_throat <- lm_corrected_pvalues_boxplot(df$x2, "Axis 2")
## [1] "Axis 2"
## New names:
## New names:
## • `` -> `...6`
axis2_throat_lm_stats <- axis2_throat$lm_stats # Access lm_stats
p4 <- axis2_throat$boxplot # Access boxplot

# Print the boxplot
print(p4)

ggarrange(p3,p4)

# run statistics on controls vs all CF samples,
# in order to make this feasible in a linear model I have to revert the order of visit_sum
#
df$visit_sum_reorder <- factor(df$visit_sum, levels = c("Control", "1", "2", "3-5", "6-7", "8-10"))

lm_corrected_pvalues_boxplot_reorder <- function(x, ylabel) {
  
  print(ylabel) # x is the diversity parameter I am interested in
  lm <- summary(lmerTest::lmer(x~ visit_sum_reorder + age_y + sex + (1|id.x), data=df))
  
  coefs <- data.frame(coef(lm))
  fdr <- p.adjust(coefs$Pr...t.., method = "fdr", n = nrow(coefs))
  
  lm_stats <- bind_cols(coefs, fdr)%>%
    mutate(p = Pr...t..)%>%
    mutate(fdr = ...6)%>%
    select(-c(Pr...t.., ...6))%>%
    rownames_to_column()%>%
    mutate(Months_after_ETI_start=rowname)%>%
    mutate(Months_after_ETI_start= case_when(Months_after_ETI_start=="(Intercept)"~"Control (Intercept)", Months_after_ETI_start=="visit_sum_reorder2"~"3", Months_after_ETI_start=="visit_sum_reorder3-5"~"6-12",Months_after_ETI_start=="visit_sum_reorder6-7"~"15-18",Months_after_ETI_start=="visit_sum_reorder8-10"~ "21-24", Months_after_ETI_start=="visit_sum_reorder1"~"Baseline", Months_after_ETI_start=="age_y"~ "age_y", Months_after_ETI_start=="sex2"~ "sex"))%>%
    mutate(fdr_star = case_when(fdr<=0.001 ~ "***", fdr<=0.01 ~ "**", fdr<=0.05 ~ "*",fdr<=0.1 ~ ".",fdr>=0.1 ~ "ns"))%>%
    mutate(p_star = case_when(p<=0.001 ~ "***", p<=0.01 ~ "**", p<=0.05 ~ "*",p<=0.1 ~ ".", p>=0.1 ~ "ns"))%>%
    select(-rowname)%>%
    select(Months_after_ETI_start, Estimate, Std..Error, df, t.value, p, fdr, p_star, fdr_star)%>%
    mutate(p=round(p,5))%>%
    mutate(fdr=round(fdr, 5))
  
  lm_stats_print <- lm_stats %>% 
    mutate(alpha_diversity = ylabel) %>% 
    select(alpha_diversity, everything())

# to make the significant asterixes printable on the plot I have to adjust the table a little
group1 <- c("(Intercept)","Control","Control","Control","Control","Control","Control","Control")

lm_stats <-  lm_stats%>%
    bind_cols(group1)%>%
    mutate(group1=...10)%>%
    mutate(group2= case_when(Months_after_ETI_start=="Control (Intercept)"~"(Intercept)",
                             Months_after_ETI_start=="3"~"2",
                             Months_after_ETI_start=="Baseline"~"1", Months_after_ETI_start=="6-12" ~ "3-5", Months_after_ETI_start=="15-18"~"6-7",Months_after_ETI_start=="21-24"~"8-10"))%>%
    filter(group2!="(Intercept)")

  boxplot <- df%>%
    ggplot(aes(visit_sum, x))+
    geom_boxplot(aes(fill=visit_sum), outlier.shape = NA, alpha=0.65) +
    geom_jitter(width = 0.1)+
    scale_fill_manual(values=visit_sum_palette)+
    theme_classic()+
    ylab(ylabel)+
    xlab("Months from ETI treatment start")+
    scale_x_discrete(labels= xlabels)+
    theme(text=element_text(size = 16), legend.position = "none")+
    guides(fill='none') +
    stat_pvalue_manual(lm_stats, label = "fdr_star",y.position = (min(x)-0.07*min(x)), step.increase = 0.05)
  
    return(list(boxplot = boxplot, lm_stats = lm_stats_print, lm_stats_sig=lm_stats))
}

#Axis 1
axis1.2_throat <- lm_corrected_pvalues_boxplot_reorder(df$x1, "Axis 1")
## [1] "Axis 1"
## New names:
## New names:
## • `` -> `...6`
axis1_throat_lm_stats_reorder <- axis1.2_throat$lm_stats # Access lm_stats
axis1_throat_reorder <- axis1.2_throat$boxplot # Access boxplot
axis1_throat_reorder

axi1_wC_thr <- axis1_throat$boxplot +
  geom_signif(
    y_position = c(-0.6, -0.55, -0.5, -0.45),  
    xmin = c(2, 3, 4, 5), 
    xmax = c(6, 6, 6, 6),
    annotation = c(".", "*", "ns", "*"), 
    tip_length = -0.03, 
    vjust = 1,
    size = 0.35
  )
axi1_wC_thr

#Axis 2
axis2.2_thr <- lm_corrected_pvalues_boxplot_reorder(df$x2, "Axis 2")
## [1] "Axis 2"
## New names:
## New names:
## • `` -> `...6`
axis2_thr_lm_stats_reorder <- axis2.2_thr$lm_stats # Access lm_stats
axis2_thr_reorder <- axis2.2_thr$boxplot # Access boxplot
axis2_thr_reorder

axi2_wC_thr <- axis2_throat$boxplot +
  geom_signif(
    y_position = c(-0.6, -0.55, -0.5, -0.45), 
    xmin = c(2, 3, 4, 5), 
    xmax = c(6, 6, 6, 6),
    annotation = c("ns", "ns", "ns", "ns"), 
    tip_length = -0.03, 
    vjust = 1,
    size = 0.35
  )
axi2_wC_thr

# Arranging the plot

axis_throat <- ggarrange(axis1_den, axis2_den, axi1_wC_thr, axi2_wC_thr, 
          ncol = 2, nrow = 2,  align = "hv", 
          widths = c(1, 1),
          common.legend = F)
## Warning: Graphs cannot be vertically aligned unless the axis parameter is set.
## Placing graphs unaligned.
## Warning: Graphs cannot be horizontally aligned unless the axis parameter is
## set. Placing graphs unaligned.
axis_throat

3 PERMANOVA in throat

# calculate BC_distance
set.seed(100)
BC_dist<- phyloseq::distance(ps_full_throat_j,
                             method="bray", weighted=F)
#extract metadata
metadata<- as(sample_data(ps_full_throat_j),"data.frame")

### unstratified

adonis2(formula = BC_dist ~ visit_sum, data = metadata, permutations = 999, by = "margin")
adonis2(formula = BC_dist ~ visit_sum + sex + age_y, data = metadata, permutations = 999, by = "margin")
adonis2(formula = BC_dist ~ visit_sum + sex + age_y +id.x, data = metadata, permutations = 999, by = "margin") # sex and age are not significant when including id.x as a explantory variable
adonis2(formula = BC_dist ~ visit_sum +id.x, data = metadata, permutations = 999, by = "margin") # sex and age are not significant when including id.x as a explantory variable
### stratified PERMANOVA per id
adonis2(formula = BC_dist ~ visit_sum , data = metadata, permutations = 999, by = "margin", strata = metadata$id.x)
adonis2(formula = BC_dist ~ visit_sum + project , data = metadata, permutations = 999, by = "margin", strata = metadata$id.x)
### stratified PERMANOVA per project (health vs CF)
adonis2(formula = BC_dist ~ visit_sum + id.x, data = metadata, permutations = 999, by = "margin", strata = metadata$project)
### unstratified PERMANOVA health vs CF
adonis2(formula = BC_dist ~ project, data = metadata, permutations = 999, by = "margin")
adonis2(formula = BC_dist ~ project + id.x, data = metadata, permutations = 999, by = "margin")
adonis2(formula = BC_dist ~ project + visit_sum, data = metadata, permutations = 999, by = "margin")
### stratified PERMANOVA health vs CF
adonis2(formula = BC_dist ~ project + visit_sum + sex + age_y, data = metadata, permutations = 999, by = "margin", strata = metadata$id.x)
adonis2(formula = BC_dist ~ project + id.x, data = metadata, permutations = 999, by = "margin", strata = metadata$visit_sum)
adonis2(formula = BC_dist ~ visit_sum + id.x, data = metadata, permutations = 999, by = "margin", strata = metadata$project)

4 PERMANOVA per time point comparison

# check baseline vs follow-up samples
metadata<- metadata %>% 
  mutate(ETI_yn = as_factor(case_when(visit_sum=="1"~"no", visit_sum=="Control"~"Control", T~"yes")))

summary(metadata$ETI_yn)
##     yes      no Control 
##     192      21      39
### plot principal component analysis by visit_sum
PCA_all_visit_throat_dominantGenus <- plot_ordination(ps_full_throat_j, ordinate(ps_full_throat_j, "MDS"), color = "visit_sum", shape="project")

project_label <- c("Control","CF")

labels <- c("Baseline", "ETI", "ETI", "ETI", "ETI", "Control")

bfu_palette <- c("black", "#55AD89FF","#55AD89FF","#55AD89FF","#55AD89FF","#8176AA" )

PCA_all_visit_throat_dominantGenus +
  geom_point(size = 4, alpha = 0.7) +
  scale_color_manual(values = bfu_palette, labels = xlabels) +
  scale_shape_manual(values = c(15, 16), labels = project_label) +
  theme_classic() +
  theme(text = element_text(size = 18), legend.position = "none", legend.text = element_text(size = 16), legend.title = element_blank()) +
  guides(color = guide_legend(title = "Months from treatment start", title.position = "left", ncol = 2))+
  stat_ellipse()

### stratified PERMANOVA control vs baseline vs follow-up
adonis2(formula = BC_dist ~ ETI_yn, data = metadata, permutations = 999, by = "margin", strata = metadata$id.x)
# Repeat PERMANOVA for separate time points, paired with baseline

metadata_v1 <- metadata %>%
  filter(visit_sum %in% c("1", "Control"))
# Extract relevant sample IDs
sample_ids <- metadata_v1$x_sample_id

# Filter the distance matrix to only include relevant rows and cols
BC_df <- as.matrix(BC_dist)
BC_df <- as.data.frame(BC_df)

# Filter rows based on sample IDs
df_v1 <- BC_df[rownames(BC_df) %in% sample_ids, ]
# Filter columns based on sample IDs
df_v1 <- df_v1[, colnames(df_v1) %in% sample_ids]

# Convert to matrix
BC_v1 <- as.matrix(df_v1)

### unstratified
adonis2(formula = BC_v1 ~ visit_sum, data = metadata_v1, permutations = 999, by = "margin")
# as there are no repeated samples no need for stratification
adonis2(formula = BC_v1 ~ project, data = metadata_v1, permutations = 999, by = "margin")
# now Control vs all follow-ups together (without baseline)

metadata_fu <- metadata %>%
  filter(ETI_yn %in% c("yes", "Control"))
# Extract relevant sample IDs
sample_ids <- metadata_fu$x_sample_id
# Filter the distance matrix to only include relevant rows and cols
BC_df <- as.matrix(BC_dist)
BC_df <- as.data.frame(BC_df)

# Filter rows based on sample IDs
df_fu <- BC_df[rownames(BC_df) %in% sample_ids, ]
# Filter columns based on sample IDs
df_fu <- df_fu[, colnames(df_fu) %in% sample_ids]

# Convert to matrix
BC_fu <- as.matrix(df_fu)

### stratified
adonis2(formula = BC_fu ~ visit_sum, data = metadata_fu, permutations = 999, by = "margin", strata = metadata_fu$id.x)
adonis2(formula = BC_fu ~ ETI_yn, data = metadata_fu, permutations = 999, by = "margin", strata = metadata_fu$id.x)
# now Control vs follow-ups separately
metadata_v2 <- metadata %>%
  filter(visit_sum %in% c("2", "Control"))
# Extract relevant sample IDs
sample_ids <- metadata_v2$x_sample_id

# Filter rows based on sample IDs
df_v2 <- BC_df[rownames(BC_df) %in% sample_ids, ]
# Filter columns based on sample IDs
df_v2 <- df_v2[, colnames(df_v2) %in% sample_ids]

# Convert to matrix
BC_v2 <- as.matrix(df_v2)

### unstratified
adonis2(formula = BC_v2 ~ visit_sum, data = metadata_v2, permutations = 999, by = "margin")
# as there are no repeated samples no need for stratification - it also gives an error
adonis2(formula = BC_v2 ~ project, data = metadata_v2, permutations = 999, by = "margin")
metadata_v3 <- metadata %>%
  filter(visit_sum %in% c("3-5", "Control"))
# Extract relevant sample IDs
sample_ids <- metadata_v3$x_sample_id

# Filter rows based on sample IDs
df_v3 <- BC_df[rownames(BC_df) %in% sample_ids, ]
# Filter columns based on sample IDs
df_v3 <- df_v3[, colnames(df_v3) %in% sample_ids]

# Convert to matrix
BC_v3 <- as.matrix(df_v3)

### unstratified
adonis2(formula = BC_v3 ~ visit_sum, data = metadata_v3, permutations = 999, by = "margin")
adonis2(formula = BC_v3 ~ project, data = metadata_v3, permutations = 999, by = "margin")
# as there are repeated samples, I will block id to verify this observation
adonis2(formula = BC_v3 ~ visit_sum, data = metadata_v3, permutations = 999, by = "margin", strata = metadata_v3$id.x)
metadata_v6 <- metadata %>%
  filter(visit_sum %in% c("6-7", "Control"))
# Extract relevant sample IDs
sample_ids <- metadata_v6$x_sample_id

# Filter rows based on sample IDs
df_v6 <- BC_df[rownames(BC_df) %in% sample_ids, ]
# Filter columns based on sample IDs
df_v6 <- df_v6[, colnames(df_v6) %in% sample_ids]

# Convert to matrix
BC_v6 <- as.matrix(df_v6)

### unstratified
adonis2(formula = BC_v6 ~ visit_sum, data = metadata_v6, permutations = 999, by = "margin")
adonis2(formula = BC_v6 ~ project, data = metadata_v6, permutations = 999, by = "margin")
# as there are repeated samples, I will block id to verify this observation
adonis2(formula = BC_v6 ~ visit_sum, data = metadata_v6, permutations = 999, by = "margin", strata = metadata_v6$id.x)
metadata_v8 <- metadata %>%
  filter(visit_sum %in% c("8-10", "Control"))
# Extract relevant sample IDs
sample_ids <- metadata_v8$x_sample_id

# Filter rows based on sample IDs
df_v8 <- BC_df[rownames(BC_df) %in% sample_ids, ]
# Filter columns based on sample IDs
df_v8 <- df_v8[, colnames(df_v8) %in% sample_ids]

# Convert to matrix
BC_v8 <- as.matrix(df_v8)

### unstratified
adonis2(formula = BC_v8 ~ visit_sum, data = metadata_v8, permutations = 999, by = "margin")
adonis2(formula = BC_v8 ~ project, data = metadata_v8, permutations = 999, by = "margin")
# as there are repeated samples, I will block id to verify this observation
adonis2(formula = BC_v8 ~ visit_sum, data = metadata_v8, permutations = 999, by = "margin", strata = metadata_v8$id.x)

5 PERMANOVA on CF-health comparisons alone

# I reduce the distance matrix to only have CF samples in the rows, and only healthy samples in the columns, so that only CF to healthy comparisons are retained

# calculate BC_distance
set.seed(100)
BC_dist<- phyloseq::distance(ps_full_throat_j,
                             method="bray", weighted=F)

#extract metadata
metadata<- as(sample_data(ps_full_throat_j),"data.frame")

# Use grep to find columns that contain "IMMP" in their names
cols_to_keep <- metadata %>% 
  filter(project=="IMMPIMMP") %>% 
  select(x_sample_id)
cols_to_keep <- cols_to_keep$x_sample_id

rows_to_keep <- metadata %>% 
  filter(project=="IMP") %>% 
  select(x_sample_id)
rows_to_keep <- rows_to_keep$x_sample_id
# Convert the distance object to a matrix
BC_matrix <- as.matrix(BC_dist)

# Subset the matrix to keep only those columns that are from controls
BC_matrix_f <- BC_matrix[, cols_to_keep]
BC_matrix_f <- BC_matrix[rows_to_keep, ]

# no keep in the metadata only CF samples
metadata <- metadata %>% 
  filter(project=="IMP")

### unstratified
adonis2(formula = BC_matrix_f ~ visit_sum, data = metadata, permutations = 999, by = "margin")

adonis2(formula = BC_matrix_f ~ visit_sum + id.x, data = metadata, permutations = 999, by = "margin")

### stratified PERMANOVA per id
adonis2(formula = BC_matrix_f ~ visit_sum, data = metadata, permutations = 999, by = "margin", strata = metadata$id.x)

# check baseline vs follow-up samples
metadata<- metadata %>% 
  mutate(ETI_yn = case_when(visit_sum=="1"~FALSE, T~TRUE))

### unstratified
adonis2(formula = BC_matrix_f ~ ETI_yn, data = metadata, permutations = 999, by = "margin")

adonis2(formula = BC_matrix_f ~ ETI_yn + id.x, data = metadata, permutations = 999, by = "margin")

### stratified PERMANOVA per id
adonis2(formula = BC_matrix_f ~ ETI_yn, data = metadata, permutations = 999, by = "margin", strata = metadata$id.x)

# Repeat PERMANOVA for separate time points, paired with baseline

md_v1v2 <- metadata %>%
  filter(visit_sum %in% c("1", "2"))

# Filter the distance matrix to only include relevant rows
BC_matrix_f <- as.data.frame(BC_matrix_f)
BC_v1v2 <- BC_matrix_f %>% 
  filter(rownames(BC_matrix_f) %in% c(md_v1v2$x_sample_id))

# Convert to matrix
BC_v1v2 <- as.matrix(BC_v1v2)

# Run PERMANOVA for specific time points
adonis2(formula = BC_v1v2 ~ visit_sum , data = md_v1v2, permutations = 999, by = "margin")
adonis2(formula = BC_v1v2 ~ visit_sum + id.x, data = md_v1v2, permutations = 999, by = "margin")

# Run PERMANOVA for visit_sum, controlling for id
adonis2(formula = BC_v1v2 ~ visit_sum, data = md_v1v2, permutations = 999, by = "margin", strata = md_v1v2$id.x)

# 6-12 months + baseline
md_v1v35 <- metadata %>%
  filter(visit_sum %in% c("1", "3-5"))

# Filter the distance matrix to only include relevant rows
BC_v35 <- BC_matrix_f %>% 
  filter(rownames(BC_matrix_f) %in% c(md_v1v35$x_sample_id))

# Convert to matrix
BC_v35 <- as.matrix(BC_v35)

# Run PERMANOVA for specific time points
adonis2(formula = BC_v35 ~ visit_sum , data = md_v1v35, permutations = 999, by = "margin")
adonis2(formula = BC_v35 ~ visit_sum + id.x, data = md_v1v35, permutations = 999, by = "margin")

# Run PERMANOVA for visit_sum, controlling for id
adonis2(formula = BC_v35 ~ visit_sum, data = md_v1v35, permutations = 999, by = "margin", strata = md_v1v35$id.x)

6 compare CF and control BC-distances for throat

# calculate BC_distance
set.seed(100)
BC_dist<- phyloseq::distance(ps_full_throat_j,
                             method="bray", weighted=F)
BC_dist.throat <- as.matrix(BC_dist)
#BC_dist.throat[lower.tri(BC_dist.throat)] <- 0  # this is needed to remove the 0 latter, to keep only single pairs
BC_dist.throat_df <- reshape2::melt(BC_dist.throat)

tmp1 <- BC_dist.throat_df%>%
  filter(grepl("IMMP", Var1)) %>% #this keeps only control samples in Var 1
  filter(!grepl("IMMP", Var2))#%>%  #this filters control samples from Var 2, so that in the end only distances between COntrols and all CF samples are kept
  #filter(value!=0)#this removes the same sample distances, and the one from set 0 from above to keep a single pair

# merge with metadata to have visit_sums for CF samples
#extract metadata
metadata<- as(sample_data(ps_full_throat_j),"data.frame")
tmp1 <- tmp1 %>% 
  left_join(metadata, by=c("Var2"="x_sample_id")) %>% 
  mutate(comparison=paste(Var1, Var2, sep = "_")) %>% 
  distinct(comparison, .keep_all = T)

summary(tmp1$visit_sum)
##       1       2     3-5     6-7    8-10 Control 
##     819    1131    3120    1677    1560       0
my_comp <- list(c("1","2"), c("1","3-5"), c("1","6-7"), c("1","8-10"))

# Calculate median and IQR
summary_stats <- tmp1 %>%
  group_by(visit_sum) %>%
  summarize(
    median = median(value),
    lower = quantile(value, 0.25),
    upper = quantile(value, 0.75)
  )

tmp1 %>% 
  ggplot(aes(visit_sum, value, fill=visit_sum))+
  geom_boxplot(outlier.shape = NA, alpha=0.7)+
  #geom_point()+
  theme_classic()+
  scale_fill_manual(values = visit_sum_palette)+
  ylab("BC distance between CF and Controls")+
  theme(legend.position = "none",text=element_text(size=20))+ #
  scale_x_discrete(labels= xlabels)+
  xlab("")+
  stat_compare_means(comparisons = my_comp, method = "wilcox.test", label = "p.adj")

ggsave("/Users/rebecca/Documents/Forschung/IMMProveCF/R_analysis/figures/BC_throat_controls.png", width = 8, height = 6)

summary(lmerTest::lmer(value~visit_sum + (1|Var1)+ (1|id.x), data = tmp1))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: value ~ visit_sum + (1 | Var1) + (1 | id.x)
##    Data: tmp1
## 
## REML criterion at convergence: -14544.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.3008 -0.6450  0.0106  0.6368  3.6986 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Var1     (Intercept) 0.003130 0.05595 
##  id.x     (Intercept) 0.004097 0.06401 
##  Residual             0.009735 0.09867 
## Number of obs: 8307, groups:  Var1, 39; id.x, 35
## 
## Fixed effects:
##                 Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)    6.889e-01  1.451e-02  7.477e+01  47.497  < 2e-16 ***
## visit_sum2    -4.180e-02  4.633e-03  8.239e+03  -9.023  < 2e-16 ***
## visit_sum3-5  -2.638e-02  4.155e-03  8.263e+03  -6.350 2.27e-10 ***
## visit_sum6-7  -3.761e-02  4.613e-03  8.264e+03  -8.154 4.02e-16 ***
## visit_sum8-10 -1.613e-02  4.681e-03  8.264e+03  -3.445 0.000574 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) vst_s2 vs_3-5 vs_6-7
## visit_sum2  -0.190                     
## visit_sm3-5 -0.221  0.675              
## visit_sm6-7 -0.204  0.622  0.760       
## vist_sm8-10 -0.202  0.620  0.753  0.718
# below I plot the BC distance per patient and each control, with controls being on the x-axis, one can observe the consistency of BC distance per patient across controls, with visit_sum giving a distinct signal in each patient
tmp1 %>% 
  ggplot(aes(Var1, value, fill=visit_sum))+
  #geom_boxplot(outlier.shape = NA)+
  geom_point(aes(color=visit_sum), alpha=0.5)+
  theme_classic()+
  scale_color_manual(values = visit_sum_palette)+
  ylab("BC distance between CF and Controls")+
  theme(legend.position = "none",text=element_text(size=20))+ #
  #scale_x_discrete(labels= xlabels)+
  xlab("")+
  facet_wrap(~id.x)

7 throat linear model + boxplot

lm_boxplot_function <- function(dependent_variable, ylabel) {

  # Fit linear mixed-effects model
  print(summary(lmerTest::lmer(paste(dependent_variable, "~ visit_sum + sex + age_y + (1|id) + (1|Var1)"), data = tmp1)))
  lm <- summary(lmerTest::lmer(paste(dependent_variable, "~ visit_sum + sex + age_y + (1|id) + (1|Var1)"), data = tmp1))
  #lm <- summary(lmerTest::lmer(value ~ visit_sum + sex + age_y + (1|id), data = tmp1))
  # Extract coefficients and adjust p-values
  coefs <- data.frame(coef(lm))
  fdr <- p.adjust(coefs$Pr...t.., method = "fdr", n = nrow(coefs))
  
  # Create lm_stats table
  lm_stats <- bind_cols(coefs, fdr) %>%
    mutate(p = Pr...t..) %>%
    mutate(fdr = ...6) %>%
    select(-c(Pr...t.., ...6)) %>%
    rownames_to_column() %>%
    mutate(Months_after_ETI_start = rowname) %>%
    mutate(Months_after_ETI_start = case_when(
      Months_after_ETI_start == "(Intercept)" ~ "Baseline (Intercept)",
      Months_after_ETI_start == "visit_sum2" ~ "3 months",
      Months_after_ETI_start == "visit_sum3-5" ~ "6-12 months",
      Months_after_ETI_start == "visit_sum6-7" ~ "15-18 months",
      Months_after_ETI_start == "visit_sum8-10" ~ "21-24 months",
      Months_after_ETI_start %in% c("sex2", "age_y") ~ Months_after_ETI_start
    )) %>%
    mutate(fdr_star = case_when(
      fdr <= 0.001 ~ "***",
      fdr <= 0.01 ~ "**",
      fdr <= 0.05 ~ "*",
      fdr <= 0.1 ~ ".",
      fdr >= 0.1 ~ "ns"
    )) %>%
    select(-rowname) %>%
    select(Months_after_ETI_start, Estimate, Std..Error, df, t.value, p, fdr, fdr_star) %>%
    mutate(p = round(p, 5)) %>%
    mutate(fdr = round(fdr, 5))
  
  # Print lm_stats table
  print(lm_stats)
  
  # Save lm_stats table as HTML
  sjPlot::tab_df(lm_stats, title = paste("lmer(", dependent_variable, " ~ visit_sum + sex + age_y + (1|id), data=tmp1)"), file = paste("/Users/rebecca/Documents/Forschung/IMMProveCF/R_analysis/table_outputs/", dependent_variable, "_table.html", sep = ""))
  
  # Print boxplot
  group1 <- c("(Intercept)", rep("1", nrow(lm_stats) - 1))
  
  lm_sig<- lm_stats %>%
    bind_cols(group1) %>%
    mutate(group1 = ...9) %>%
    mutate(group2 = case_when(
      Months_after_ETI_start == "Baseline (Intercept)" ~ "1",
      Months_after_ETI_start == "3 months" ~ "2",
      Months_after_ETI_start == "6-12 months" ~ "3-5",
      Months_after_ETI_start == "15-18 months" ~ "6-7",
      Months_after_ETI_start == "21-24 months" ~ "8-10",
      Months_after_ETI_start %in% c("sex2", "age_y") ~ Months_after_ETI_start
    )) %>%
    filter(group1 != "(Intercept)") %>%
    filter(Months_after_ETI_start!="sex2") %>% 
    filter(Months_after_ETI_start!="age_y")
  
  max_y <- max(tmp1[[dependent_variable]], na.rm = TRUE)
  min_y <- min(tmp1[[dependent_variable]], na.rm = TRUE)
  
 # Calculate the adjusted y.position for stat labels
stat_labels_y_position <- max_y + 0.1 * (max_y - min_y)

  xlabels <- c("0", "3", "6-12", "15-18", "21-24")
  
# Print boxplot
boxplot <- tmp1 %>%
  ggplot(aes(get(dependent_variable), x = visit_sum)) +
  geom_boxplot(aes(fill = visit_sum), outlier.shape = NA, alpha = 0.7) +
  scale_fill_manual(values = visit_sum_palette) +
  theme_classic() +
  ylab(ylabel) +
  xlab("Months from ETI treatment start") +
  scale_x_discrete(labels = xlabels) +
  theme(text = element_text(size = 18), legend.position = "none") +
   stat_pvalue_manual(lm_sig, label = "fdr_star", y.position = stat_labels_y_position, step.increase = 0.05)

return(list(lm_stats = lm_stats, boxplot = boxplot))
}

throat <- lm_boxplot_function("value", "Throat: BC distance - CF & Control")
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## paste(dependent_variable, "~ visit_sum + sex + age_y + (1|id) + (1|Var1)")
##    Data: tmp1
## 
## REML criterion at convergence: -14534.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.2896 -0.6446  0.0092  0.6347  3.7029 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Var1     (Intercept) 0.003130 0.05595 
##  id       (Intercept) 0.003788 0.06155 
##  Residual             0.009731 0.09864 
## Number of obs: 8307, groups:  Var1, 39; id, 35
## 
## Fixed effects:
##                 Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)    6.296e-01  2.462e-02  4.433e+01  25.575  < 2e-16 ***
## visit_sum2    -4.221e-02  4.634e-03  8.252e+03  -9.109  < 2e-16 ***
## visit_sum3-5  -2.777e-02  4.191e-03  7.721e+03  -6.625 3.70e-11 ***
## visit_sum6-7  -4.039e-02  4.746e-03  5.152e+03  -8.510  < 2e-16 ***
## visit_sum8-10 -1.978e-02  4.910e-03  3.026e+03  -4.029 5.74e-05 ***
## sex2           2.901e-02  2.111e-02  3.049e+01   1.374   0.1795    
## age_y          1.801e-03  7.411e-04  3.462e+01   2.431   0.0204 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) vst_s2 vs_3-5 vs_6-7 v_8-10 sex2  
## visit_sum2  -0.086                                   
## visit_sm3-5 -0.035  0.673                            
## visit_sm6-7  0.050  0.612  0.763                     
## vist_sm8-10  0.098  0.601  0.751  0.736              
## sex2        -0.351  0.001  0.012  0.022  0.031       
## age_y       -0.691 -0.035 -0.133 -0.237 -0.303 -0.119
## New names:
## • `` -> `...6`
##   Months_after_ETI_start     Estimate   Std..Error         df   t.value       p
## 1   Baseline (Intercept)  0.629587955 0.0246168850   44.33130 25.575452 0.00000
## 2               3 months -0.042214097 0.0046345369 8252.29314 -9.108590 0.00000
## 3            6-12 months -0.027768307 0.0041912536 7721.41521 -6.625299 0.00000
## 4           15-18 months -0.040386983 0.0047459917 5152.04960 -8.509704 0.00000
## 5           21-24 months -0.019781584 0.0049098627 3025.76258 -4.028949 0.00006
## 6                   sex2  0.029005377 0.0211132808   30.49098  1.373798 0.17952
## 7                  age_y  0.001801253 0.0007410551   34.61843  2.430660 0.02040
##       fdr fdr_star
## 1 0.00000      ***
## 2 0.00000      ***
## 3 0.00000      ***
## 4 0.00000      ***
## 5 0.00008      ***
## 6 0.17952       ns
## 7 0.02380        *
## New names:
## • `` -> `...9`
p2 <- throat$boxplot
p2

throat$lm_stats
library(coefplot)
## Warning: package 'coefplot' was built under R version 4.1.2
# Fit your linear mixed-effects model
# For example, replace 'YourModel' and 'YourData' with your model and data, id is the CF patient id, and Var1 encodes the control id, by controlling for the pairs between CF patients and controls, implosion is controlled
model <-  lmerTest::lmer(value ~ visit_sum + sex + age_y + (1|id) + (1|Var1), data = tmp1)

# Extract coefficients
coefficients <- coef(model)

# Plot coefficients
  coef_p <- coefplot::coefplot(model, intercept = FALSE, color = "black")
  coef_p_t <- coef_p+
    theme_classic()+
    scale_y_discrete(labels= c("3 m", "6-12 m", "15-18 m", "21-24 m", "sex", "age"))+
    coord_flip()+
    xlab("Estimate")+
    ggtitle("")+
    ylab("")+
    theme(text = element_text(size = 18))
  coef_p_t

ggarrange(p1,p2, widths = c(2:1))

#ggsave("/Users/rebecca/Documents/Forschung/IMMProveCF/R_analysis/figures/BetaandBC_throat_controls.png", width = 13, height = 5.5)

8 PERMANOVA on CF-Control distances in throat

# from tmp1 , the dataframe where I have all CF-Control distances listed, first I create a id for control samples

set.seed(123)

df_pm <- tmp1 %>%
  mutate(id_c = str_extract_all(comparison, "(?<=K|O)\\d+")) %>% 
  mutate(id_c = as_factor(sapply(id_c, function(x) as.integer(x[1]))))

# Select relevant columns
df <- df_pm %>%
  select(Var1, Var2, value)

df_s <- df_pm %>%
  select(Var1, Var2, value, visit_sum, id.y, id_c)

# Pivot data to wide format
df_wide <- df %>%
  pivot_wider(names_from = Var1, values_from = value)

# Set row names and remove the column used for row names
rownames(df_wide) <- df_wide$Var2
## Warning: Setting row names on a tibble is deprecated.
df_matrix <- df_wide %>% 
  select(-Var2)

# Convert to matrix
BC <- as.matrix(df_matrix)

# Ensure distinct rows for running adonis2
df_pm_d <- df_pm %>%
  distinct(Var2, .keep_all = TRUE)

# Run PERMANOVA for overall effect
adonis2(formula = BC ~ visit_sum + id.y, data = df_pm_d, permutations = 999, by = "margin")
# Run PERMANOVA for visit_sum, controlling for id.y
adonis2(formula = BC ~ visit_sum, data = df_pm_d, permutations = 999, by = "margin", strata = df_pm_d$id.y)
# Repeat PERMANOVA for separate time points, paired with baseline

df_pm_d_v1v2 <- df_pm_d %>%
  filter(visit_sum %in% c("1", "2"))

# Filter the distance matrix to only include relevant rows
df_v1v2 <- df_wide %>% 
  filter(Var2 %in% c(df_pm_d_v1v2$Var2))
df_v1v2$Var2<- NULL
# Convert to matrix
BC_v1v2 <- as.matrix(df_v1v2)

# Run PERMANOVA for specific time points
adonis2(formula = BC_v1v2 ~ visit_sum + id.y, data = df_pm_d_v1v2, permutations = 999, by = "margin")
# Run PERMANOVA for visit_sum, controlling for id.y
adonis2(formula = BC_v1v2 ~ visit_sum, data = df_pm_d_v1v2, permutations = 999, by = "margin", strata = df_pm_d_v1v2$id.y)
# visits 6-12 months
df_pm_d_v1v35 <- df_pm_d %>%
  filter(visit_sum %in% c("1", "3-5"))

# Filter the distance matrix to only include relevant rows
df_v1v35 <- df_wide %>% 
  filter(Var2 %in% c(df_pm_d_v1v35$Var2))
df_v1v35$Var2<- NULL
# Convert to matrix
BC_v1v35 <- as.matrix(df_v1v35)

# Run PERMANOVA for specific time points
adonis2(formula = BC_v1v35 ~ visit_sum + id.y, data = df_pm_d_v1v35, permutations = 999, by = "margin")
# Run PERMANOVA for visit_sum, controlling for id.y
adonis2(formula = BC_v1v35 ~ visit_sum, data = df_pm_d_v1v35, permutations = 999, by = "margin", strata = df_pm_d_v1v35$id.y)
# visits 15-18 months
df_pm_d_v1v67 <- df_pm_d %>%
  filter(visit_sum %in% c("1", "6-7"))

# Filter the distance matrix to only include relevant rows
df_v1v67 <- df_wide %>% 
  filter(Var2 %in% c(df_pm_d_v1v67$Var2))
df_v1v67$Var2<- NULL
# Convert to matrix
BC_v1v67 <- as.matrix(df_v1v67)

# Run PERMANOVA for specific time points
adonis2(formula = BC_v1v67 ~ visit_sum + id.y, data = df_pm_d_v1v67, permutations = 999, by = "margin")
# Run PERMANOVA for visit_sum, controlling for id.y
adonis2(formula = BC_v1v67 ~ visit_sum, data = df_pm_d_v1v67, permutations = 999, by = "margin", strata = df_pm_d_v1v67$id.y)
# visits 21-24 months
df_pm_d_v1v89 <- df_pm_d %>%
  filter(visit_sum %in% c("1", "8-10"))

# Filter the distance matrix to only include relevant rows
df_v1v89 <- df_wide %>% 
  filter(Var2 %in% c(df_pm_d_v1v89$Var2))
df_v1v89$Var2<- NULL
# Convert to matrix
BC_v1v89 <- as.matrix(df_v1v89)

# Run PERMANOVA for specific time points
adonis2(formula = BC_v1v89 ~ visit_sum + id.y, data = df_pm_d_v1v89, permutations = 999, by = "margin")
# Run PERMANOVA for visit_sum, controlling for id.y
adonis2(formula = BC_v1v89 ~ visit_sum, data = df_pm_d_v1v89, permutations = 999, by = "margin", strata = df_pm_d_v1v89$id.y)

9 Analysis on most abundant taxa per patient in stool

#Get top taxa per patient
#find.top.taxa2 is sourced from functions.R
top.stool<- find.top.taxa2(ps_full_stool_glom, "Genus",1)
top.stool$Species<- NULL

rslt <- top.stool[, "taxa"]
dd <- matrix(unlist(rslt), nrow=1)
colnames(dd) <- rownames(top.stool)
top.stool <- t(dd)

top.stool_df <- data.frame(x1 = row.names(top.stool), top.stool)%>%
  mutate(dominantGenus = top.stool)
top.stool_df$top.stool<- NULL

# add clinical data to p

##Add dominant Genus to ps_full_stool_glom sample data
ps_full_stool_glom_j <- microViz::ps_join(ps_full_stool_glom, top.stool_df, by = "x1")

##Add dominant Genus to ps_full_stool_glom sample data
ps_full_stool_j <- microViz::ps_join(ps_full_stool, top.stool_df, by = "x1")

### plot principal component analysis by visit_sum
PCA_all_visit_stool_dominantGenus <- plot_ordination(ps_full_stool_j, ordinate(ps_full_stool_j, "MDS"), color = "visit_sum", shape="project")

project_label <- c("Control","CF")

p5 <- PCA_all_visit_stool_dominantGenus +
  geom_point(size = 4, alpha = 0.7) +
  scale_fill_manual(values = visit_sum_palette, labels = xlabels) +
  scale_color_manual(values = visit_sum_palette, labels = xlabels) +
  scale_shape_manual(values = c(15, 16), labels = project_label) +
  theme_classic() +
  theme(text = element_text(size = 18), legend.position = "none", legend.text = element_text(size = 16), legend.title = element_blank()) +
  stat_ellipse()
p5

#ggsave("/Users/rebecca/Documents/Forschung/IMMProveCF/R_analysis/figures/beta_stool_controls.png", width = 10, height = 6)

10 create density plots for axis 1 and 2

# Perform PCoA
ord_obj <- ordinate(ps_full_stool_j, "MDS")

# Extract coordinates from PCoA
pcoa_coords <- ord_obj$vectors

# Create a data frame for ggplot
df <- data.frame(x1 = pcoa_coords[, 1], x2 = pcoa_coords[, 2], visit_sum = sample_data(ps_full_stool_j)$visit_sum, age_y=sample_data(ps_full_stool_j)$age_y, sex=sample_data(ps_full_stool_j)$sex, id.x=sample_data(ps_full_stool_j)$id.x)

# Create a named vector for custom labels
custom_labels <- c("1" = "0", 
                   "2" = "3", 
                   "3-5" = "6-12", 
                   "6-7" = "15-18", 
                   "8-10" = "21-24", 
                   "Control" = "Control")

# Create density plots on the x and y axes
plot_x_st <- ggplot(df, aes(x = x1, fill = as.factor(visit_sum))) +
  geom_density(alpha = 0.5) +
  theme_classic() +
  #clean_theme()+
  scale_x_continuous(breaks = c(-0.3, 0, 0.3), labels = c("-0.3", "0", "0.3"))+
  scale_fill_manual(values = visit_sum_palette, labels = xlabels)+
  theme(legend.position = "none", text = element_text(size = 18), legend.title = element_blank())

# Define the common y-axis limits
y_axis_limits <- c(0, 6.5)  # Adjust these values as needed

# Density plot with fixed y-axis range
axis1_den <- ggplot(df, aes(x = x1, fill = as.factor(visit_sum))) +
  geom_density(alpha = 0.8) +
  theme_classic() +
  scale_x_continuous(breaks = c(-0.3, 0, 0.3), labels = c("-0.3", "0", "0.3")) +
    scale_fill_manual(values = visit_sum_palette, labels = xlabels) +
  theme(
    legend.position = "none",
    text = element_text(size = 18),
    legend.title = element_blank(),
    strip.text.y = element_text(angle = 0)  # Rotate the facet labels
  ) +
  facet_grid(rows = vars(visit_sum), labeller =  as_labeller(custom_labels)) +
  ylim(y_axis_limits)+
  labs( x = "Axis 1")
axis1_den

# Boxplot comparing x between the 5 groups
axis1_bp <- ggplot(df, aes(x = as.factor(visit_sum), y = x1, fill = as.factor(visit_sum))) +
  geom_boxplot(alpha=0.8) +
  theme_classic() +
  scale_x_discrete(labels = xlabels) +
  scale_fill_manual(values = visit_sum_palette, labels = xlabels) +
  theme(
    legend.position = "none",
    text = element_text(size = 18),
    legend.title = element_blank()
  ) +
  labs(x = "Months from tt start", y = "Axis 1")
axis1_bp

plot_y_st <- ggplot(df, aes(x = x2, fill = as.factor(visit_sum))) +
  geom_density(alpha = 0.5) +
  theme_classic() +
  #clean_theme()+
  xlim(c(-0.6, 0.4))+
  scale_fill_manual(values = visit_sum_palette, labels = xlabels)+
  theme(legend.position = "none",text = element_text(size = 18), legend.title = element_blank())+
  coord_flip()

# Define the common y-axis limits
y_axis_limits <- c(0, 5)  # Adjust these values as needed

# Density plot with fixed y-axis range
axis2_den <- ggplot(df, aes(x = x2, fill = as.factor(visit_sum))) +
  geom_density(alpha = 0.8) +
  theme_classic() +
  scale_x_continuous(breaks = c(-0.6, 0, 0.4), labels = c("-0.6", "0", "0.4")) +
  scale_fill_manual(values = visit_sum_palette, labels = xlabels) +
  theme(
    legend.position = "none",
    text = element_text(size = 18),
    legend.title = element_blank(),
    strip.text.y = element_text(angle = 0)  # Rotate the facet labels
  ) +
  facet_grid(rows = vars(visit_sum), labeller =  as_labeller(custom_labels)) +
  ylim(y_axis_limits)+
  labs(x = "Axis 2")
axis2_den

# Boxplot comparing x between the 5 groups
axis2_bp <- ggplot(df, aes(x = as.factor(visit_sum), y = x2, fill = as.factor(visit_sum))) +
  geom_boxplot(alpha=0.8) +
  theme_classic() +
  scale_x_discrete(labels = xlabels) +
  scale_fill_manual(values = visit_sum_palette, labels = xlabels) +
  theme(
    legend.position = "none",
    text = element_text(size = 18),
    legend.title = element_blank(),
  ) +
  labs(x = "Months from tt start", y = "Axis 2")
axis2_bp

# Extract the legend. Returns a gtable
leg <- get_legend(p1_leg)

# Convert to a ggplot and print
legend <- as_ggplot(leg)

# Arranging the plot

axis <- ggarrange(axis1_den, axis2_den, axis1_bp, axis2_bp, 
          ncol = 2, nrow = 2,  align = "hv", 
          widths = c(1, 1),
          common.legend = F)
## Warning: Graphs cannot be vertically aligned unless the axis parameter is set.
## Placing graphs unaligned.
## Warning: Graphs cannot be horizontally aligned unless the axis parameter is
## set. Placing graphs unaligned.
axis

stool_density <- ggarrange(plot_x_th, NULL, p1, plot_y_th, 
          ncol = 2, nrow = 2,  align = "hv", 
          widths = c(2, 1), heights = c(1, 2),
          common.legend = F)
stool_density

11 add linear models on axis boxplots

# Load required libraries
# Run function on  data
axis1 <- lm_corrected_pvalues_boxplot(df$x1, "Axis 1")
## [1] "Axis 1"
## New names:
## New names:
## • `` -> `...6`
axis1_lm_stats <- axis1$lm_stats # Access lm_stats
p3 <- axis1$boxplot # Access boxplot

# Print the boxplot
print(p3)

axis2 <- lm_corrected_pvalues_boxplot(df$x2, "Axis 2")
## [1] "Axis 2"
## New names:
## New names:
## • `` -> `...6`
axis2_lm_stats <- axis2$lm_stats # Access lm_stats
p4 <- axis2$boxplot # Access boxplot

# Print the boxplot
print(p4)

ggarrange(p3,p4)

# run statistics on controls vs all CF samples,
# in order to make this feasible in a linear model I have to revert the order of visit_sum
#
df$visit_sum_reorder <- factor(df$visit_sum, levels = c("Control", "1", "2", "3-5", "6-7", "8-10"))

# run CF vs Controls
#Axis 1
axis1.2 <- lm_corrected_pvalues_boxplot_reorder(df$x1, "Axis 1")
## [1] "Axis 1"
## New names:
## New names:
## • `` -> `...6`
axis1_lm_stats_reorder <- axis1.2$lm_stats # Access lm_stats
axis1_reorder <- axis1.2$boxplot # Access boxplot
axis1_reorder

axi1_wC <- axis1$boxplot +
  geom_signif(
    y_position = c(-0.6, -0.55, -0.5, -0.45),  
    xmin = c(2, 3, 4, 5), 
    xmax = c(6, 6, 6, 6),
    annotation = c("***", "***", "***", "***"), 
    tip_length = -0.03, 
    vjust = 1,
    size = 0.35
  )
axi1_wC

#Axis 2
axis2.2 <- lm_corrected_pvalues_boxplot_reorder(df$x2, "Axis 2")
## [1] "Axis 2"
## New names:
## New names:
## • `` -> `...6`
axis2_lm_stats_reorder <- axis2.2$lm_stats # Access lm_stats
axis2_reorder <- axis2.2$boxplot # Access boxplot
axis2_reorder

axi2_wC <- axis2$boxplot +
  geom_signif(
    y_position = c(-0.4, -0.35, -0.3, -0.25), 
    xmin = c(2, 3, 4, 5), 
    xmax = c(6, 6, 6, 6),
    annotation = c("ns", "ns", "ns", "ns"), 
    tip_length = -0.03, 
    vjust = 1,
    size = 0.35
  )
axi2_wC

# Arranging the plot

axis_stool <- ggarrange(axis1_den, axis2_den, axi1_wC, axi2_wC, 
          ncol = 2, nrow = 2,  align = "hv", 
          widths = c(1, 1),
          common.legend = F)
## Warning: Graphs cannot be vertically aligned unless the axis parameter is set.
## Placing graphs unaligned.
## Warning: Graphs cannot be horizontally aligned unless the axis parameter is
## set. Placing graphs unaligned.
axis_stool

12 Check nutrition and stool consistency on stool beta-diversity

### plot principal component analysis by nutrition
PCA_all_visit_stool_nutrition <- plot_ordination(ps_full_stool_j, ordinate(ps_full_stool_j, "MDS"), color = "basic_nutrition", shape="project")

project_label <- c("Control","CF")

PCA_all_visit_stool_nutrition+
  geom_point(size = 4)+
  ggtitle("Variation by time point with healthy controls")+
  #scale_color_manual(values = visit_sum_palette)+
  scale_shape_manual(values = c(23,15),labels = project_label)+
  theme_bw()+
  theme(text=element_text(size=24), legend.position = "right", legend.text = element_text(size=16), legend.title = element_text(size=18))+
  guides(color = guide_legend(title = "time points"))+
   guides(shape = guide_legend(title = "CF vs Control"))+
  stat_ellipse()
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_path()`).

### plot principal component analysis by bristol
PCA_all_visit_stool_bristol <- plot_ordination(ps_full_stool_j, ordinate(ps_full_stool_j, "MDS"), color = "bristol", shape="project")

PCA_all_visit_stool_bristol+
  geom_point(size = 4)+
  ggtitle("Variation by time point with healthy controls")+
  #scale_color_manual(values = visit_sum_palette)+
  scale_shape_manual(values = c(23,15),labels = project_label)+
  theme_bw()+
  theme(text=element_text(size=24), legend.position = "right", legend.text = element_text(size=16), legend.title = element_text(size=18))+
  guides(color = guide_legend(title = "Bristol stool scale"))+
   guides(shape = guide_legend(title = "CF vs Control"))+
  stat_ellipse()
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_path()`).

 #calculate BC_distance
set.seed(100)
BC_dist<- phyloseq::distance(ps_full_stool_j,
                             method="bray", weighted=F)
#extract metadata
metadata<- as(sample_data(ps_full_stool_j),"data.frame")
metadata$id.x
##   [1] "11"   "11"   "11"   "11"   "11"   "12"   "12"   "12"   "12"   "13"  
##  [11] "13"   "15"   "15"   "15"   "15"   "16"   "16"   "16"   "16"   "17"  
##  [21] "17"   "17"   "17"   "17"   "18"   "18"   "18"   "18"   "19"   "19"  
##  [31] "19"   "19"   "20"   "20"   "20"   "21"   "21"   "21"   "21"   "22"  
##  [41] "22"   "23"   "24"   "24"   "24"   "24"   "25"   "25"   "25"   "25"  
##  [51] "26"   "26"   "26"   "26"   "27"   "27"   "27"   "28"   "28"   "29"  
##  [61] "29"   "29"   "29"   "30"   "30"   "30"   "30"   "30"   "31"   "31"  
##  [71] "31"   "31"   "32"   "32"   "32"   "32"   "33"   "35"   "35"   "36"  
##  [81] "5"    "5"    "5"    "5"    "5"    "6"    "6"    "6"    "6"    "8"   
##  [91] "8"    "8"    "8"    "8"    "9"    "9"    "9"    "26"   "32"   "36"  
## [101] "9"    "11"   "11"   "15"   "15"   "16"   "17"   "18"   "21"   "21"  
## [111] "23"   "23"   "23"   "25"   "25"   "28"   "28"   "29"   "29"   "30"  
## [121] "31"   "32"   "33"   "36"   "37"   "38"   "39"   "40"   "41"   "5"   
## [131] "6"    "6"    "8"    "8"    "9"    "11"   "15"   "17"   "17"   "18"  
## [141] "19"   "19"   "21"   "21"   "23"   "24"   "24"   "25"   "25"   "26"  
## [151] "28"   "29"   "30"   "30"   "31"   "32"   "32"   "33"   "36"   "36"  
## [161] "38"   "38"   "39"   "40"   "41"   "41"   "42"   "42"   "43"   "5"   
## [171] "5"    "6"    "8"    "9"    "9"    "K001" "K002" "K003" "K004" "K006"
## [181] "K007" "K008" "K010" "K011" "K012" "K013" "K015" "11"   "15"   "17"  
## [191] "19"   "21"   "23"   "24"   "25"   "26"   "28"   "29"   "29"   "30"  
## [201] "31"   "32"   "33"   "36"   "37"   "39"   "40"   "40"   "41"   "42"  
## [211] "6"    "8"    "K009" "K016" "K017" "K019" "K020" "K022" "K023" "K029"
## [221] "K031" "K043" "K044" "K045" "K049" "19"   "38"   "39"   "40"   "41"  
## [231] "42"   "K040" "K042" "K050" "K052" "K053" "K005" "K018" "K021" "K024"
## [241] "K025" "K026" "K027" "K028" "K030" "K031" "K032" "K036" "K039" "K047"
## [251] "K048"
nutri <- vegan::adonis2(BC_dist ~ basic_nutrition + bristol + sex+ id.x,
              permutations = 999, na.action=na.exclude, data = metadata)
nutri

13 PERMANOVA in stool

# calculate BC_distance
set.seed(100)
BC_dist<- phyloseq::distance(ps_full_stool_j,
                             method="bray", weighted=F)
#extract metadata
metadata<- as(sample_data(ps_full_stool_j),"data.frame")

### stratified PERMANOVA per visit group
adonis2(formula = BC_dist ~ visit_sum + sex + age_y, data = metadata, permutations = 999, by = "margin", strata = metadata$id.x)
### stratified PERMANOVA health vs CF
adonis2(formula = BC_dist ~ project + sex + age_y, data = metadata, permutations = 999, by = "margin", strata = metadata$id.x)

14 compare CF and control BC-distances for stool

# calculate BC_distance
set.seed(100)
BC_dist<- phyloseq::distance(ps_full_stool_j,
                             method="bray", weighted=F)
BC_dist.stool <- as.matrix(BC_dist)
#BC_dist.stool[lower.tri(BC_dist.stool)] <- 0  # this is needed to remove the 0 latter, to keep only single pairs
BC_dist.stool_df <- reshape2::melt(BC_dist.stool)

tmp1 <- BC_dist.stool_df%>%
  filter(grepl("IMMP", Var1)) %>% #this keeps only control samples in Var 1
  filter(!grepl("IMMP", Var2))#%>%  #this filters control samples from Var 2, so that in the end only distances between COntrols and all CF samples are kept
  #filter(value!=0)#this removes the same sample distances, and the one from set 0 from above to keep a single pair

# merge with metadata to have visit_sums for CF samples
#extract metadata
metadata<- as(sample_data(ps_full_stool_j),"data.frame")
tmp1 <- tmp1 %>% 
  left_join(metadata, by=c("Var2"="x_sample_id")) %>% 
  mutate(comparison=paste(Var1, Var2, sep = "_")) %>% 
  distinct(comparison, .keep_all = T)

summary(tmp1$visit_sum)
##       1       2     3-5     6-7    8-10 Control 
##    1530    1170    3420    1800    1350       0
my_comp <- list(c("1","2"), c("1","3-5"), c("1","6-7"), c("1","8-10"))

# Calculate median and IQR
summary_stats <- tmp1 %>%
  group_by(visit_sum) %>%
  summarize(
    median = median(value),
    lower = quantile(value, 0.25),
    upper = quantile(value, 0.75)
  )

tmp1 %>% 
  ggplot(aes(visit_sum, value, fill=visit_sum))+
  geom_boxplot(outlier.shape = NA, alpha=0.7)+
  #geom_point()+
  theme_classic()+
  scale_fill_manual(values = visit_sum_palette)+
  ylab("BC distance: CF and Controls")+
  theme(legend.position = "none",text=element_text(size=20))+ #
  scale_x_discrete(labels= xlabels)+
  xlab("")+
  stat_compare_means(comparisons = my_comp, method = "wilcox.test", label = "p.adj")

ggsave("/Users/rebecca/Documents/Forschung/IMMProveCF/R_analysis/figures/BC_stool_controls.png", width = 6, height = 5)

summary(lmerTest::lmer(value~visit_sum + age_y+sex+ (1|id.x)+ (1|Var1), data = tmp1))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: value ~ visit_sum + age_y + sex + (1 | id.x) + (1 | Var1)
##    Data: tmp1
## 
## REML criterion at convergence: -25960.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.2239 -0.6299  0.0563  0.6663  3.9718 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Var1     (Intercept) 0.001064 0.03262 
##  id.x     (Intercept) 0.005001 0.07072 
##  Residual             0.003390 0.05823 
## Number of obs: 9270, groups:  Var1, 45; id.x, 35
## 
## Fixed effects:
##                 Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)    6.686e-01  2.549e-02  2.769e+01  26.233  < 2e-16 ***
## visit_sum2     1.548e-02  2.329e-03  9.206e+03   6.649 3.13e-11 ***
## visit_sum3-5  -1.033e-02  1.934e-03  3.064e+03  -5.340 9.96e-08 ***
## visit_sum6-7  -8.608e-03  2.404e-03  7.528e+02  -3.581 0.000364 ***
## visit_sum8-10 -3.248e-02  2.753e-03  3.748e+02 -11.798  < 2e-16 ***
## age_y          4.152e-03  7.890e-04  3.148e+01   5.263 9.68e-06 ***
## sex2          -4.800e-04  2.410e-02  2.086e+01  -0.020 0.984302    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) vst_s2 vs_3-5 vs_6-7 v_8-10 age_y 
## visit_sum2   0.021                                   
## visit_sm3-5  0.167  0.545                            
## visit_sm6-7  0.288  0.459  0.662                     
## vist_sm8-10  0.355  0.413  0.628  0.655              
## age_y       -0.712 -0.082 -0.298 -0.453 -0.543       
## sex2        -0.403  0.008  0.032  0.046  0.058 -0.109
# below I plot the BC distance per patient and each control, with controls being on the x-axis, one can observe the consistency of BC distance per patient across controls, with visit_sum giving a distinct signal in each patient
tmp1 %>% 
  ggplot(aes(Var1, value, fill=visit_sum))+
  #geom_boxplot(outlier.shape = NA)+
  geom_point(aes(color=visit_sum), alpha=0.5)+
  theme_classic()+
  scale_color_manual(values = visit_sum_palette)+
  ylab("BC distance between CF and Controls")+
  theme(legend.position = "none",text=element_text(size=20))+ #
  #scale_x_discrete(labels= xlabels)+
  xlab("")+
  facet_wrap(~id.x)

15 stool linear model + boxplot

lm_boxplot_function <- function(dependent_variable, ylabel) {
  
  # Fit linear mixed-effects model
  print(summary(lmerTest::lmer(paste(dependent_variable, "~ visit_sum + sex + age_y + (1|id) + (1|Var1)"), data = tmp1)))
  lm <- summary(lmerTest::lmer(paste(dependent_variable, "~ visit_sum + sex + age_y + (1|id) + (1|Var1)"), data = tmp1))
  #lm <- summary(lmerTest::lmer(value ~ visit_sum + sex + age_y + (1|id), data = tmp1))
  # Extract coefficients and adjust p-values
  coefs <- data.frame(coef(lm))
  fdr <- p.adjust(coefs$Pr...t.., method = "fdr", n = nrow(coefs))
  
  # Create lm_stats table
  lm_stats <- bind_cols(coefs, fdr) %>%
    mutate(p = Pr...t..) %>%
    mutate(fdr = ...6) %>%
    select(-c(Pr...t.., ...6)) %>%
    rownames_to_column() %>%
    mutate(Months_after_ETI_start = rowname) %>%
    mutate(Months_after_ETI_start = case_when(
      Months_after_ETI_start == "(Intercept)" ~ "Baseline (Intercept)",
      Months_after_ETI_start == "visit_sum2" ~ "3 months",
      Months_after_ETI_start == "visit_sum3-5" ~ "6-12 months",
      Months_after_ETI_start == "visit_sum6-7" ~ "15-18 months",
      Months_after_ETI_start == "visit_sum8-10" ~ "21-24 months",
      Months_after_ETI_start %in% c("sex2", "age_y") ~ Months_after_ETI_start
    )) %>%
    mutate(fdr_star = case_when(
      fdr <= 0.001 ~ "***",
      fdr <= 0.01 ~ "**",
      fdr <= 0.05 ~ "*",
      fdr <= 0.1 ~ ".",
      fdr >= 0.1 ~ "ns"
    )) %>%
    select(-rowname) %>%
    select(Months_after_ETI_start, Estimate, Std..Error, df, t.value, p, fdr, fdr_star) %>%
    mutate(p = round(p, 5)) %>%
    mutate(fdr = round(fdr, 5))
  
  # Print lm_stats table
  print(lm_stats)
  
  # Save lm_stats table as HTML
  sjPlot::tab_df(lm_stats, title = paste("lmer(", dependent_variable, " ~ visit_sum + sex + age_y + (1|id), data=tmp1)"), file = paste("/Users/rebecca/Documents/Forschung/IMMProveCF/R_analysis/table_outputs/", dependent_variable, "_table.html", sep = ""))
  
  # Print boxplot
  group1 <- c("(Intercept)", rep("1", nrow(lm_stats) - 1))
  
  lm_sig <- lm_stats %>%
    bind_cols(group1) %>%
    mutate(group1 = ...9) %>%
    mutate(group2 = case_when(
      Months_after_ETI_start == "Baseline (Intercept)" ~ "1",
      Months_after_ETI_start == "3 months" ~ "2",
      Months_after_ETI_start == "6-12 months" ~ "3-5",
      Months_after_ETI_start == "15-18 months" ~ "6-7",
      Months_after_ETI_start == "21-24 months" ~ "8-10",
      Months_after_ETI_start %in% c("sex2", "age_y") ~ Months_after_ETI_start
    )) %>%
    filter(group1 != "(Intercept)") %>%
    filter(Months_after_ETI_start!="sex2") %>% 
    filter(Months_after_ETI_start!="age_y")
  
  max_y <- max(tmp1[[dependent_variable]], na.rm = TRUE)
  min_y <- min(tmp1[[dependent_variable]], na.rm = TRUE)
  
 # Calculate the adjusted y.position for stat labels
stat_labels_y_position <- max_y + 0.1 * (max_y - min_y)

  xlabels <- c("0", "3", "6-12", "15-18", "21-24")
  
# Print boxplot
boxplot <- tmp1 %>%
  ggplot(aes(get(dependent_variable), x = visit_sum)) +
  geom_boxplot(aes(fill = visit_sum), outlier.shape = NA, alpha = 0.7) +
  scale_fill_manual(values = visit_sum_palette) +
  theme_classic() +
  ylab(ylabel) +
  xlab("Months from ETI treatment start") +
  scale_x_discrete(labels = xlabels) +
  theme(text = element_text(size = 18), legend.position = "none") +
   stat_pvalue_manual(lm_sig, label = "fdr_star", y.position = stat_labels_y_position, step.increase = 0.05)

return(list(lm_stats = lm_stats, boxplot = boxplot))
}

stool <- lm_boxplot_function("value", "Stool: BC distance - CF & Control")
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## paste(dependent_variable, "~ visit_sum + sex + age_y + (1|id) + (1|Var1)")
##    Data: tmp1
## 
## REML criterion at convergence: -25960.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.2239 -0.6299  0.0563  0.6663  3.9718 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Var1     (Intercept) 0.001064 0.03262 
##  id       (Intercept) 0.005001 0.07072 
##  Residual             0.003390 0.05823 
## Number of obs: 9270, groups:  Var1, 45; id, 35
## 
## Fixed effects:
##                 Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)    6.686e-01  2.549e-02  2.769e+01  26.233  < 2e-16 ***
## visit_sum2     1.548e-02  2.329e-03  9.206e+03   6.649 3.13e-11 ***
## visit_sum3-5  -1.033e-02  1.934e-03  3.064e+03  -5.340 9.96e-08 ***
## visit_sum6-7  -8.608e-03  2.404e-03  7.528e+02  -3.581 0.000364 ***
## visit_sum8-10 -3.248e-02  2.753e-03  3.748e+02 -11.798  < 2e-16 ***
## sex2          -4.800e-04  2.410e-02  2.086e+01  -0.020 0.984302    
## age_y          4.152e-03  7.890e-04  3.148e+01   5.263 9.68e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) vst_s2 vs_3-5 vs_6-7 v_8-10 sex2  
## visit_sum2   0.021                                   
## visit_sm3-5  0.167  0.545                            
## visit_sm6-7  0.288  0.459  0.662                     
## vist_sm8-10  0.355  0.413  0.628  0.655              
## sex2        -0.403  0.008  0.032  0.046  0.058       
## age_y       -0.712 -0.082 -0.298 -0.453 -0.543 -0.109
## New names:
## • `` -> `...6`
##   Months_after_ETI_start      Estimate   Std..Error         df      t.value
## 1   Baseline (Intercept)  0.6686460360 0.0254882860   27.69322  26.23346414
## 2               3 months  0.0154825436 0.0023287270 9205.85956   6.64850084
## 3            6-12 months -0.0103261566 0.0019336476 3064.43496  -5.34024741
## 4           15-18 months -0.0086080236 0.0024037139  752.81380  -3.58113489
## 5           21-24 months -0.0324778153 0.0027528313  374.79532 -11.79796791
## 6                   sex2 -0.0004799697 0.0241038700   20.86432  -0.01991256
## 7                  age_y  0.0041525010 0.0007890069   31.47633   5.26294642
##         p     fdr fdr_star
## 1 0.00000 0.00000      ***
## 2 0.00000 0.00000      ***
## 3 0.00000 0.00000      ***
## 4 0.00036 0.00042      ***
## 5 0.00000 0.00000      ***
## 6 0.98430 0.98430       ns
## 7 0.00001 0.00001      ***
## New names:
## • `` -> `...9`
p4 <- stool$boxplot
stool$lm_stats
# Fit your linear mixed-effects model
# For example, replace 'YourModel' and 'YourData' with your model and data
model <-  lmerTest::lmer(value ~ visit_sum + sex + age_y + (1|id) + (1|Var1), data = tmp1)

# Extract coefficients
coefficients <- coef(model)

# Extract coefficients
coefficients <- coef(model)

# Plot coefficients
  coef_p <- coefplot::coefplot(model, intercept = FALSE, color = "black")
  coef_p_s<- coef_p+
    theme_classic()+
    scale_y_discrete(labels= c("3 m", "6-12 m", "15-18 m", "21-24 m", "sex", "age"))+
    coord_flip()+
    xlab("Estimate")+
    ggtitle("")+
    ylab("")+
    theme(text = element_text(size = 18))
  coef_p_s

16 PERMANOVA on CF-Control distances in stool

# calculate BC_distance
set.seed(100)
BC_dist<- phyloseq::distance(ps_full_stool_j,
                             method="bray", weighted=F)
#extract metadata
metadata<- as(sample_data(ps_full_stool_j),"data.frame")

### unstratified

adonis2(formula = BC_dist ~ visit_sum, data = metadata, permutations = 999, by = "margin")
adonis2(formula = BC_dist ~ visit_sum + sex + age_y, data = metadata, permutations = 999, by = "margin")
adonis2(formula = BC_dist ~ visit_sum + sex + age_y +id.x, data = metadata, permutations = 999, by = "margin") # sex and age are not significant when including id.x as a explantory variable
adonis2(formula = BC_dist ~ visit_sum +id.x, data = metadata, permutations = 999, by = "margin") # sex and age are not significant when including id.x as a explantory variable
### stratified PERMANOVA per id
adonis2(formula = BC_dist ~ visit_sum , data = metadata, permutations = 999, by = "margin", strata = metadata$id.x)
adonis2(formula = BC_dist ~ visit_sum + project , data = metadata, permutations = 999, by = "margin", strata = metadata$id.x)
### stratified PERMANOVA per project (health vs CF)
adonis2(formula = BC_dist ~ visit_sum + id.x, data = metadata, permutations = 999, by = "margin", strata = metadata$project)
### unstratified PERMANOVA health vs CF
adonis2(formula = BC_dist ~ project, data = metadata, permutations = 999, by = "margin")
adonis2(formula = BC_dist ~ project + id.x, data = metadata, permutations = 999, by = "margin")
adonis2(formula = BC_dist ~ project + visit_sum, data = metadata, permutations = 999, by = "margin")
### stratified PERMANOVA health vs CF
adonis2(formula = BC_dist ~ project + visit_sum + sex + age_y, data = metadata, permutations = 999, by = "margin", strata = metadata$id.x)
adonis2(formula = BC_dist ~ project + id.x, data = metadata, permutations = 999, by = "margin", strata = metadata$visit_sum)
adonis2(formula = BC_dist ~ visit_sum + id.x, data = metadata, permutations = 999, by = "margin", strata = metadata$project)

17 PERMANOVA per time point comparison

# check baseline vs follow-up samples
metadata<- metadata %>% 
  mutate(ETI_yn = as_factor(case_when(visit_sum=="1"~"no", visit_sum=="Control"~"Control", T~"yes")))

summary(metadata$ETI_yn)
##      no     yes Control 
##      34     172      45
### plot principal component analysis by visit_sum
PCA_all_visit_stool_dominantGenus <- plot_ordination(ps_full_stool_j, ordinate(ps_full_stool_j, "MDS"), color = "visit_sum", shape="project")

project_label <- c("Control","CF")

labels <- c("Baseline", "ETI", "ETI", "ETI", "ETI", "Control")

bfu_palette <- c("black", "#55AD89FF","#55AD89FF","#55AD89FF","#55AD89FF","#8176AA" )

PCA_all_visit_stool_dominantGenus +
  geom_point(size = 4, alpha = 0.7) +
  scale_color_manual(values = bfu_palette, labels = xlabels) +
  scale_shape_manual(values = c(15, 16), labels = project_label) +
  theme_classic() +
  theme(text = element_text(size = 18), legend.position = "none", legend.text = element_text(size = 16), legend.title = element_blank()) +
  guides(color = guide_legend(title = "Months from treatment start", title.position = "left", ncol = 2))+
  stat_ellipse()

### stratified PERMANOVA control vs baseline vs follow-up
adonis2(formula = BC_dist ~ ETI_yn, data = metadata, permutations = 999, by = "margin", strata = metadata$id.x)
# Repeat PERMANOVA for separate time points, paired with baseline

# as there are no repeated samples no need for stratification
adonis2(formula = BC_v1 ~ project, data = metadata_v1, permutations = 999, by = "margin")
# now Control vs all follow-ups together (without baseline)

metadata_fu <- metadata %>%
  filter(ETI_yn %in% c("yes", "Control"))
# Extract relevant sample IDs
sample_ids <- metadata_fu$x_sample_id
# Filter the distance matrix to only include relevant rows and cols
BC_df <- as.matrix(BC_dist)
BC_df <- as.data.frame(BC_df)

# Filter rows based on sample IDs
df_fu <- BC_df[rownames(BC_df) %in% sample_ids, ]
# Filter columns based on sample IDs
df_fu <- df_fu[, colnames(df_fu) %in% sample_ids]

# Convert to matrix
BC_fu <- as.matrix(df_fu)

### stratified
adonis2(formula = BC_fu ~ visit_sum, data = metadata_fu, permutations = 999, by = "margin", strata = metadata_fu$id.x)
adonis2(formula = BC_fu ~ ETI_yn, data = metadata_fu, permutations = 999, by = "margin", strata = metadata_fu$id.x)
# now Control vs follow-ups separately
metadata_v2 <- metadata %>%
  filter(visit_sum %in% c("2", "Control"))
# Extract relevant sample IDs
sample_ids <- metadata_v2$x_sample_id

# Filter rows based on sample IDs
df_v2 <- BC_df[rownames(BC_df) %in% sample_ids, ]
# Filter columns based on sample IDs
df_v2 <- df_v2[, colnames(df_v2) %in% sample_ids]

# Convert to matrix
BC_v2 <- as.matrix(df_v2)

### unstratified
adonis2(formula = BC_v2 ~ visit_sum, data = metadata_v2, permutations = 999, by = "margin")
# as there are no repeated samples no need for stratification - it also gives an error
adonis2(formula = BC_v2 ~ project, data = metadata_v2, permutations = 999, by = "margin")
metadata_v3 <- metadata %>%
  filter(visit_sum %in% c("3-5", "Control"))
# Extract relevant sample IDs
sample_ids <- metadata_v3$x_sample_id

# Filter rows based on sample IDs
df_v3 <- BC_df[rownames(BC_df) %in% sample_ids, ]
# Filter columns based on sample IDs
df_v3 <- df_v3[, colnames(df_v3) %in% sample_ids]

# Convert to matrix
BC_v3 <- as.matrix(df_v3)

### unstratified
adonis2(formula = BC_v3 ~ visit_sum, data = metadata_v3, permutations = 999, by = "margin")
adonis2(formula = BC_v3 ~ project, data = metadata_v3, permutations = 999, by = "margin")
# as there are repeated samples, I will block id to verify this observation
adonis2(formula = BC_v3 ~ visit_sum, data = metadata_v3, permutations = 999, by = "margin", strata = metadata_v3$id.x)
metadata_v6 <- metadata %>%
  filter(visit_sum %in% c("6-7", "Control"))
# Extract relevant sample IDs
sample_ids <- metadata_v6$x_sample_id

# Filter rows based on sample IDs
df_v6 <- BC_df[rownames(BC_df) %in% sample_ids, ]
# Filter columns based on sample IDs
df_v6 <- df_v6[, colnames(df_v6) %in% sample_ids]

# Convert to matrix
BC_v6 <- as.matrix(df_v6)

### unstratified
adonis2(formula = BC_v6 ~ visit_sum, data = metadata_v6, permutations = 999, by = "margin")
adonis2(formula = BC_v6 ~ project, data = metadata_v6, permutations = 999, by = "margin")
# as there are repeated samples, I will block id to verify this observation
adonis2(formula = BC_v6 ~ visit_sum, data = metadata_v6, permutations = 999, by = "margin", strata = metadata_v6$id.x)
metadata_v8 <- metadata %>%
  filter(visit_sum %in% c("8-10", "Control"))
# Extract relevant sample IDs
sample_ids <- metadata_v8$x_sample_id

# Filter rows based on sample IDs
df_v8 <- BC_df[rownames(BC_df) %in% sample_ids, ]
# Filter columns based on sample IDs
df_v8 <- df_v8[, colnames(df_v8) %in% sample_ids]

# Convert to matrix
BC_v8 <- as.matrix(df_v8)

### unstratified
adonis2(formula = BC_v8 ~ visit_sum, data = metadata_v8, permutations = 999, by = "margin")
adonis2(formula = BC_v8 ~ project, data = metadata_v8, permutations = 999, by = "margin")
# as there are repeated samples, I will block id to verify this observation
adonis2(formula = BC_v8 ~ visit_sum, data = metadata_v8, permutations = 999, by = "margin", strata = metadata_v8$id.x)
## Set of permutations < 'minperm'. Generating entire set.

18 PERMANOVA on CF-health comparisons alone

# I reduce the distance matrix to only have CF samples in the rows, and only healthy samples in the columns, so that only CF to healthy comparisons are retained

# calculate BC_distance
set.seed(100)
BC_dist<- phyloseq::distance(ps_full_stool_j,
                             method="bray", weighted=F)

#extract metadata
metadata<- as(sample_data(ps_full_stool_j),"data.frame")

# Use grep to find columns that contain "IMMP" in their names
cols_to_keep <- metadata %>% 
  filter(project=="IMMPIMMP") %>% 
  select(x_sample_id)
cols_to_keep <- cols_to_keep$x_sample_id

rows_to_keep <- metadata %>% 
  filter(project=="IMP") %>% 
  select(x_sample_id)
rows_to_keep <- rows_to_keep$x_sample_id
# Convert the distance object to a matrix
BC_matrix <- as.matrix(BC_dist)

# Subset the matrix to keep only those columns that are from controls
BC_matrix_f <- BC_matrix[, cols_to_keep]
BC_matrix_f <- BC_matrix[rows_to_keep, ]

# no keep in the metadata only CF samples
metadata <- metadata %>% 
  filter(project=="IMP")

### unstratified
adonis2(formula = BC_matrix_f ~ visit_sum, data = metadata, permutations = 999, by = "margin")
adonis2(formula = BC_matrix_f ~ visit_sum + id.x, data = metadata, permutations = 999, by = "margin")
### stratified PERMANOVA per id
adonis2(formula = BC_matrix_f ~ visit_sum, data = metadata, permutations = 999, by = "margin", strata = metadata$id.x)
# check the impact of number of treatment days
adonis2(formula = BC_matrix_f ~ delta_ETI_merged, data = metadata, permutations = 999, by = "margin", strata = metadata$id.x)
# check baseline vs follow-up samples
metadata<- metadata %>% 
  mutate(ETI_yn = case_when(visit_sum=="1"~FALSE, T~TRUE))

### unstratified
adonis2(formula = BC_matrix_f ~ ETI_yn, data = metadata, permutations = 999, by = "margin")
adonis2(formula = BC_matrix_f ~ ETI_yn + id.x, data = metadata, permutations = 999, by = "margin")
### stratified PERMANOVA per id
adonis2(formula = BC_matrix_f ~ ETI_yn, data = metadata, permutations = 999, by = "margin", strata = metadata$id.x)
# Repeat PERMANOVA for separate time points, paired with baseline

md_v1v2 <- metadata %>%
  filter(visit_sum %in% c("1", "2"))

# Filter the distance matrix to only include relevant rows
BC_matrix_f <- as.data.frame(BC_matrix_f)
BC_v1v2 <- BC_matrix_f %>% 
  filter(rownames(BC_matrix_f) %in% c(md_v1v2$x_sample_id))

# Convert to matrix
BC_v1v2 <- as.matrix(BC_v1v2)

# Run PERMANOVA for specific time points
adonis2(formula = BC_v1v2 ~ visit_sum , data = md_v1v2, permutations = 999, by = "margin")
adonis2(formula = BC_v1v2 ~ visit_sum + id.x, data = md_v1v2, permutations = 999, by = "margin")
# Run PERMANOVA for visit_sum, controlling for id
adonis2(formula = BC_v1v2 ~ visit_sum, data = md_v1v2, permutations = 999, by = "margin", strata = md_v1v2$id.x)
# 6-12 months + baseline
md_v1v35 <- metadata %>%
  filter(visit_sum %in% c("1", "3-5"))

# Filter the distance matrix to only include relevant rows
BC_v35 <- BC_matrix_f %>% 
  filter(rownames(BC_matrix_f) %in% c(md_v1v35$x_sample_id))

# Convert to matrix
BC_v35 <- as.matrix(BC_v35)

# Run PERMANOVA for specific time points
adonis2(formula = BC_v35 ~ visit_sum , data = md_v1v35, permutations = 999, by = "margin")
adonis2(formula = BC_v35 ~ visit_sum + id.x, data = md_v1v35, permutations = 999, by = "margin")
# Run PERMANOVA for visit_sum, controlling for id
adonis2(formula = BC_v35 ~ visit_sum, data = md_v1v35, permutations = 999, by = "margin", strata = md_v1v35$id.x)
# 21-24 months + baseline
md_v1v89 <- metadata %>%
  filter(visit_sum %in% c("1", "8-10"))

# Filter the distance matrix to only include relevant rows
BC_v89 <- BC_matrix_f %>% 
  filter(rownames(BC_matrix_f) %in% c(md_v1v89$x_sample_id))

# Convert to matrix
BC_v89 <- as.matrix(BC_v89)

# Run PERMANOVA for specific time points
adonis2(formula = BC_v89 ~ visit_sum , data = md_v1v89, permutations = 999, by = "margin")
adonis2(formula = BC_v89 ~ visit_sum + id.x, data = md_v1v89, permutations = 999, by = "margin")
# Run PERMANOVA for visit_sum, controlling for id
adonis2(formula = BC_v89 ~ visit_sum, data = md_v1v89, permutations = 999, by = "margin", strata = md_v1v89$id.x)

19 combine plots

library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
p1 <- p1+
  ggtitle("Throat")

p5 <- p5+
  ggtitle("Stool")

grid_arranged_t <- grid.arrange(
  p1+ggtitle(""),
  NULL,
  plot_x_th+theme(axis.text.x = element_blank(), axis.title.x = element_blank(), axis.ticks = element_blank())+ggtitle("Throat")+labs(tag = "A") ,
  plot_y_th+theme(axis.text.y = element_blank(), axis.title.y = element_blank(), axis.ticks = element_blank()),
  p5+ggtitle(""),
  NULL,
  plot_x_st+theme(axis.text.x = element_blank(), axis.title.x = element_blank(), axis.ticks = element_blank())+ggtitle("Stool")+labs(tag = "D") ,
  plot_y_st+theme(axis.text.y = element_blank(), axis.title.y = element_blank(), axis.ticks = element_blank()),
  coef_p_t + labs(tag = "B"),
  p2 + labs(tag = "C"),
  coef_p_s + labs(tag = "E"),
  p4 + labs(tag = "F"),
  legend,
  ncol = 3, nrow = 5,
  heights = c(1, 2, 1,2, 0.5),
  widths=c(2,0.5,1.5),
  layout_matrix = rbind(c(3,2,9),
                        c(1,4,10),
                        c(7,6,11),
                        c(5,8,12),
                        c(13,13,13)
                      )
)

# Print or save the arranged plot
#ggsave("/Users/rebecca/Documents/Forschung/IMMProveCF/Manuscript_16S/Submission_CHM/Fig3.tiff",grid_arranged_t, dpi = 300, width = 16, height = 18)

# combine stats table

throat$lm_stats <- throat$lm_stats %>% 
  mutate(Sample="Throat") %>% 
  select(Sample, everything())

stool$lm_stats <- stool$lm_stats %>% 
  mutate(Sample="Stool") %>% 
  select(Sample, everything())

bc_stats <- rbind(throat$lm_stats, stool$lm_stats)
kable(bc_stats)
Sample Months_after_ETI_start Estimate Std..Error df t.value p fdr fdr_star
Throat Baseline (Intercept) 0.6295880 0.0246169 44.33130 25.5754517 0.00000 0.00000 ***
Throat 3 months -0.0422141 0.0046345 8252.29314 -9.1085902 0.00000 0.00000 ***
Throat 6-12 months -0.0277683 0.0041913 7721.41521 -6.6252986 0.00000 0.00000 ***
Throat 15-18 months -0.0403870 0.0047460 5152.04960 -8.5097037 0.00000 0.00000 ***
Throat 21-24 months -0.0197816 0.0049099 3025.76258 -4.0289486 0.00006 0.00008 ***
Throat sex2 0.0290054 0.0211133 30.49098 1.3737977 0.17952 0.17952 ns
Throat age_y 0.0018013 0.0007411 34.61843 2.4306602 0.02040 0.02380 *
Stool Baseline (Intercept) 0.6686460 0.0254883 27.69322 26.2334641 0.00000 0.00000 ***
Stool 3 months 0.0154825 0.0023287 9205.85956 6.6485008 0.00000 0.00000 ***
Stool 6-12 months -0.0103262 0.0019336 3064.43496 -5.3402474 0.00000 0.00000 ***
Stool 15-18 months -0.0086080 0.0024037 752.81380 -3.5811349 0.00036 0.00042 ***
Stool 21-24 months -0.0324778 0.0027528 374.79532 -11.7979679 0.00000 0.00000 ***
Stool sex2 -0.0004800 0.0241039 20.86432 -0.0199126 0.98430 0.98430 ns
Stool age_y 0.0041525 0.0007890 31.47633 5.2629464 0.00001 0.00001 ***
#write_csv(bc_stats, "/Users/rebecca/Documents/Forschung/IMMProveCF/R_analysis/table_outputs/lmm_bc_distance_control.csv")
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] gridExtra_2.3     coefplot_1.2.8    lmerTest_3.1-3    lme4_1.1-30      
##  [5] Matrix_1.4-0      vegan_2.6-6.1     lattice_0.20-45   permute_0.9-7    
##  [9] dendextend_1.17.1 ggpubr_0.6.0      readxl_1.4.2      naniar_1.1.0     
## [13] knitr_1.47        microbiome_1.16.0 janitor_2.1.0     magrittr_2.0.3   
## [17] phyloseq_1.38.0   lubridate_1.9.2   forcats_1.0.0     stringr_1.5.1    
## [21] dplyr_1.1.4       purrr_1.0.2       readr_2.1.4       tidyr_1.3.1      
## [25] tibble_3.2.1      ggplot2_3.5.1     tidyverse_2.0.0   microViz_0.9.3   
## 
## loaded via a namespace (and not attached):
##   [1] backports_1.4.1        systemfonts_1.0.4      plyr_1.8.7            
##   [4] igraph_1.4.1           splines_4.1.0          TH.data_1.1-0         
##   [7] GenomeInfoDb_1.30.1    digest_0.6.36          useful_1.2.6.1        
##  [10] foreach_1.5.2          htmltools_0.5.8.1      viridis_0.6.5         
##  [13] fansi_1.0.3            cluster_2.1.2          tzdb_0.4.0            
##  [16] Biostrings_2.62.0      modelr_0.1.11          sandwich_3.0-1        
##  [19] timechange_0.2.0       colorspace_2.0-3       textshaping_0.3.6     
##  [22] xfun_0.45              crayon_1.5.1           RCurl_1.98-1.12       
##  [25] jsonlite_1.8.8         zoo_1.8-10             survival_3.2-13       
##  [28] iterators_1.0.14       ape_5.8                glue_1.6.2            
##  [31] gtable_0.3.5           zlibbioc_1.40.0        emmeans_1.7.2         
##  [34] XVector_0.34.0         sjstats_0.18.1         sjmisc_2.8.9          
##  [37] car_3.0-12             Rhdf5lib_1.16.0        BiocGenerics_0.40.0   
##  [40] abind_1.4-5            scales_1.3.0           mvtnorm_1.1-3         
##  [43] DBI_1.2.3              ggeffects_1.1.1        rstatix_0.7.2         
##  [46] Rcpp_1.0.9             performance_0.8.0      viridisLite_0.4.2     
##  [49] xtable_1.8-4           stats4_4.1.0           datawizard_0.2.3      
##  [52] pkgconfig_2.0.3        farver_2.1.1           sass_0.4.9            
##  [55] utf8_1.2.2             tidyselect_1.2.1       labeling_0.4.2        
##  [58] rlang_1.1.4            reshape2_1.4.4         effectsize_0.6.0.1    
##  [61] munsell_0.5.0          cellranger_1.1.0       tools_4.1.0           
##  [64] cachem_1.0.6           cli_3.6.3              generics_0.1.3        
##  [67] pacman_0.5.1           sjlabelled_1.1.8       ade4_1.7-22           
##  [70] broom_1.0.6            evaluate_0.24.0        biomformat_1.22.0     
##  [73] fastmap_1.2.0          yaml_2.3.5             ragg_1.2.5            
##  [76] visdat_0.6.0           nlme_3.1-155           compiler_4.1.0        
##  [79] rstudioapi_0.16.0      ggsignif_0.6.3         bslib_0.7.0           
##  [82] stringi_1.7.8          highr_0.11             parameters_0.16.0     
##  [85] nloptr_2.0.3           multtest_2.50.0        vctrs_0.6.5           
##  [88] pillar_1.9.0           lifecycle_1.0.4        rhdf5filters_1.6.0    
##  [91] jquerylib_0.1.4        estimability_1.3       data.table_1.14.2     
##  [94] cowplot_1.1.3          bitops_1.0-7           insight_0.16.0        
##  [97] R6_2.5.1               IRanges_2.28.0         codetools_0.2-18      
## [100] boot_1.3-28            MASS_7.3-55            rhdf5_2.38.1          
## [103] withr_2.5.0            multcomp_1.4-18        S4Vectors_0.32.4      
## [106] GenomeInfoDbData_1.2.7 mgcv_1.8-38            bayestestR_0.11.5     
## [109] parallel_4.1.0         hms_1.1.3              grid_4.1.0            
## [112] sjPlot_2.8.10          coda_0.19-4            minqa_1.2.4           
## [115] rmarkdown_2.27         snakecase_0.11.0       carData_3.0-5         
## [118] Rtsne_0.17             numDeriv_2016.8-1.1    Biobase_2.54.0